# TDT Timestamps

There are three types of data:

1. High quality: timestamps were recorded in dat files alongside videos and enable a clear link between frame time and tdt sample.
- 2018-02-20 to 2018-02-23 (end-of-project): 12 sessions 
2. Fully missing data: timestamps recorded as zeros in both image and dat files
- 2018-01-29: 3 sessions (+ 1 other video that did not save)
- 2018-02-20: 3 sessions (AM sessions only)
- 2018-02-22: 1 session (F1613 Block J5-30)
3. Partially missing data: timestamps only recoded in videos, thus requiring extra steps to recover timestamps and then clean results.
- 2018-01-31: 4 sessions
- 2018-02-19: 2 sessions

In [2]:
from pathlib import Path
import os, sys

import numpy as np
import pandas as pd
from dotenv import load_dotenv

github_repo = Path.cwd().parent.parent.parent
sys.path.insert(0, str(github_repo))

from lib import utils
from Methods.video_tracking import loading as vload

load_dotenv()

True

In [3]:
# Paths
timestamp_path = github_repo  / 'data/text/tdt_timestamps_4_videos'

dlc_path = Path(os.getenv("local_home")) / 'Task_Switching/head_tracking'
dlc_file = 'DLC_aligned_230218_1258.parquet'
dlc_filepath = dlc_path / dlc_file

In [4]:
def get_videos_btwn_dates(start_dt:str, end_dt:str):
    
    query = """ 
        SELECT 
            ferret,
            block,
            filename as video_name,
            REPLACE( REPLACE(filename, '.avi', '.dat'), 'Track', 'FrameTDTsamps') as dat_file,
            CAST(frame_count AS INTEGER)
        FROM task_switch.video_files
        WHERE session_dt > %(start_dt)s
            AND session_dt < %(end_dt)s;
    """

    df = utils.query_postgres(query, params={'start_dt':start_dt, 'end_dt':end_dt})

    return df.drop( df.query("video_name.str.contains('resized')").index)

In [5]:
def add_timestamp_counts(timestamp_path:Path, df:pd.DataFrame):
    """ Load the timestamps for each file and check the number of frames for which there are timestamps. 
    
    *Note that the first value in the file is always zero and this is discarded* """

    df.n_timestamps = np.nan
    df.n_zeros = np.nan

    for idx, video_file in df.iterrows():

        timestamp_filepath = timestamp_path / video_file.dat_file
        timestamps = np.loadtxt( timestamp_filepath, delimiter=',')

        df.loc[idx, 'n_timestamps'] = len(timestamps)
        df.loc[idx, 'n_zeros'] = sum(timestamps == 0.0)

    return df

In [6]:
def add_tracking_frame_counts( dlc_filepath:Path, df:pd.DataFrame):
    """ Load tracking data to confirm that we have landmark data for the right number of frames """

    df.dlc_n_frames = np.nan

    for idx, video_file in df.iterrows():
        
        dlc_data = vload.load_parquet( dlc_filepath, video_file['ferret'], video_file['block'])
        df.loc[idx, 'dlc_n_frames'] = dlc_data.shape[0]

    return df

In [7]:
def check_consistency(df:pd.DataFrame):

    assert all(df['frame_count'] - df['n_timestamps'] == 0.0)
    assert all(df['dlc_n_frames'] - df['n_timestamps'] == 0.0)
    assert all(df['dlc_n_frames'] - df['frame_count'] == 0.0)

    print('All tests passed')

## 1. High quality data

Start by listing videos for which tdt timestamps were recorded in .dat files (the optimal case)

In [8]:
tdt_videos = pd.concat((
    get_videos_btwn_dates('2018-02-20 12:00:00.00', '2018-02-22 16:00:00.00'),
    get_videos_btwn_dates('2018-02-23 00:00:00.00', '2018-02-28 23:00:00.00')
)).reset_index(drop=True)

tdt_videos = add_timestamp_counts(timestamp_path, tdt_videos)

tdt_videos = add_tracking_frame_counts(dlc_filepath, tdt_videos)

check_consistency(tdt_videos)

tdt_videos

All tests passed


Unnamed: 0,ferret,block,video_name,dat_file,frame_count,n_timestamps,n_zeros,dlc_n_frames
0,1605,J5-37,2018-02-20_Track_15-21-53.avi,2018-02-20_FrameTDTsamps_15-21-53.dat,18136,18136.0,0.0,18136.0
1,1613,J5-27,2018-02-20_Track_16-03-00.avi,2018-02-20_FrameTDTsamps_16-03-00.dat,16396,16396.0,0.0,16396.0
2,1605,J5-33,2018-02-21_Track_08-33-52.avi,2018-02-21_FrameTDTsamps_08-33-52.dat,26030,26030.0,0.0,26030.0
3,1613,J5-26,2018-02-21_Track_09-25-11.avi,2018-02-21_FrameTDTsamps_09-25-11.dat,30678,30678.0,0.0,30678.0
4,1605,J5-35,2018-02-22_Track_09-45-40.avi,2018-02-22_FrameTDTsamps_09-45-40.dat,25526,25526.0,0.0,25526.0
5,1613,J5-29,2018-02-22_Track_10-31-10.avi,2018-02-22_FrameTDTsamps_10-31-10.dat,17010,17010.0,0.0,17010.0
6,1605,J5-36,2018-02-22_Track_15-19-38.avi,2018-02-22_FrameTDTsamps_15-19-38.dat,28075,28075.0,0.0,28075.0
7,1605,J5-39,2018-02-23_Track_09-57-47.avi,2018-02-23_FrameTDTsamps_09-57-47.dat,33810,33810.0,0.0,33810.0
8,1613,J5-31,2018-02-23_Track_10-59-32.avi,2018-02-23_FrameTDTsamps_10-59-32.dat,22191,22191.0,0.0,22191.0
9,1605,J5-40,2018-02-23_Track_15-35-32.avi,2018-02-23_FrameTDTsamps_15-35-32.dat,23709,23709.0,0.0,23709.0


## 2. Partial Datasets

List datasets for which tdt timestamps have been recovered by Optical Character Recognition

In [9]:
tdt_partial = get_videos_btwn_dates('2018-01-31 00:00:00.00', '2018-02-20 00:00:00.00')
tdt_partial = add_timestamp_counts(timestamp_path, tdt_partial)
tdt_partial = add_tracking_frame_counts(dlc_filepath, tdt_partial)

check_consistency(tdt_partial)
tdt_partial

All tests passed


Unnamed: 0,ferret,block,video_name,dat_file,frame_count,n_timestamps,n_zeros,dlc_n_frames
0,1605,J5-20,2018-01-31_Track_08-32-38.avi,2018-01-31_FrameTDTsamps_08-32-38.dat,24004,24004.0,0.0,24004.0
1,1613,J5-14,2018-01-31_Track_09-20-28.avi,2018-01-31_FrameTDTsamps_09-20-28.dat,17388,17388.0,0.0,17388.0
2,1613,J5-15,2018-01-31_Track_14-10-30.avi,2018-01-31_FrameTDTsamps_14-10-30.dat,14936,14936.0,0.0,14936.0
3,1613,J5-16,2018-01-31_Track_14-36-12.avi,2018-01-31_FrameTDTsamps_14-36-12.dat,3510,3510.0,0.0,3510.0
4,1605,J5-28,2018-02-19_Track_10-50-46.avi,2018-02-19_FrameTDTsamps_10-50-46.dat,23373,23373.0,0.0,23373.0
5,1613,J5-22,2018-02-19_Track_11-35-10.avi,2018-02-19_FrameTDTsamps_11-35-10.dat,6495,6495.0,0.0,6495.0


## 3. Fully missing data

We can see that one video actually doesn't have any frames, which we need to remove from the analysis.

In [12]:
fmis = pd.concat([
    get_videos_btwn_dates('2018-01-29 00:00:00.00', '2018-01-29 20:00:00.00'),
    get_videos_btwn_dates('2018-02-20 00:00:00.00', '2018-02-20 13:00:00.00'),
    get_videos_btwn_dates('2018-02-22 16:00:00.00', '2018-02-22 20:00:00.00')
]).reset_index(drop=True)

fmis = fmis.query('frame_count > 0')
fmis = add_tracking_frame_counts(dlc_filepath, fmis)

fmis

Unnamed: 0,ferret,block,video_name,dat_file,frame_count
0,1605,J5-15,2018-01-29_Track_10-56-52.avi,2018-01-29_FrameTDTsamps_10-56-52.dat,0
1,1613,J5-11,2018-01-29_Track_12-01-16.avi,2018-01-29_FrameTDTsamps_12-01-16.dat,22876
2,1605,J5-16,2018-01-29_Track_15-07-11.avi,2018-01-29_FrameTDTsamps_15-07-11.dat,14415
3,1613,J5-12,2018-01-29_Track_15-44-24.avi,2018-01-29_FrameTDTsamps_15-44-24.dat,20033
4,1605,J5-32,2018-02-20_Track_10-20-05.avi,2018-02-20_FrameTDTsamps_10-20-05.dat,22501
5,1613,J5-24,2018-02-20_Track_11-04-34.avi,2018-02-20_FrameTDTsamps_11-04-34.dat,7697
6,1613,J5-25,2018-02-20_Track_11-19-54.avi,2018-02-20_FrameTDTsamps_11-19-54.dat,13085
7,1613,J5-30,2018-02-22_Track_16-14-04.avi,2018-02-22_FrameTDTsamps_16-14-04.dat,16781
