In [1]:
import glob
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd

In [2]:
PRETRIAL_TIME = 1  # seconds
POSTTRIAL_TIME = 5  # seconds
SAMPLE_RATE = 25000  # Hz

In [3]:
# pair of .mat file and .csv file
# pairs defined in Recordings_list.csv

recording_notes = pd.read_csv(
    "../data/Recordings_list.csv",
    usecols=["mouse_id", "filename", "video_nr"],
    header=0,
    sep=";",
)

def get_name_vid_nums(row):
    name_num = int(row["filename"].replace("Data", ""))
    vid_num = int(row["video_nr"].replace("vid", ""))
    return pd.Series({"trigger_num": name_num, "video_num": vid_num})


recording_notes[["trigger_num", "video_num"]] = recording_notes.apply(get_name_vid_nums, axis=1)
recording_notes



Unnamed: 0,mouse_id,filename,video_nr,trigger_num,video_num
0,1,Data1,vid2,1,2
1,1,Data2,vid3,2,3
2,1,Data3,vid6,3,6
3,1,Data4,vid7,4,7
4,1,Data5,vid8,5,8
...,...,...,...,...,...
77,6,Data78,vid101,78,101
78,6,Data79,vid104,79,104
79,6,Data80,vid105,80,105
80,6,Data81,vid108,81,108


In [4]:
# recording_notes = recording_notes[recording_notes["mouse_id"] == mouse_id]
# recording_notes = recording_notes.reset_index(drop=True)


In [5]:
def get_data_paths(mouse_id, recording_num, recording_notes):
    # FOLDER
    folder_path = "../data/A{mouse_id}/".format(mouse_id=mouse_id)

    # read TRIGGER and POSE combination
    recording_note = recording_notes.iloc[recording_num]
    trigger_num = recording_note["trigger_num"]
    video_num = recording_note["video_num"]

    # TRIGGER PATH
    trigger_path = folder_path + "Data{trigger_num}".format(trigger_num=trigger_num) + ".mat"

    # POSE PATH
    # read all pose files for that mouse
    pose_paths = glob.glob(folder_path + "*.csv")

    # format video num to two digits
    if video_num < 10:
        video_num = "0" + str(video_num)
    else:
        video_num = str(video_num)

    # filter fore pose path that starts with FH00 + video_num
    pose_path = [p for p in pose_paths if str(video_num).zfill(4) in p]
    assert len(pose_path) > 0, "No pose path found"
    assert len(pose_path) < 2, "Multiple pose paths found"
    pose_path = pose_path[0]
    
    return trigger_path, pose_path

In [None]:
# get that trial from recording notes
recording_note = recording_notes.iloc[recording_num]
trigger_num = recording_note["trigger_num"]
video_num = recording_note["video_num"]

# construct paths
folder_path = "../data/A{mouse_id}/".format(mouse_id=mouse_id)
trigger_path = folder_path + "Data{trigger_num}".format(trigger_num=trigger_num) + ".mat"

# read all pose files for that mouse
pose_paths = glob.glob(folder_path + "*.csv")

# format video num to two digits
if video_num < 10:
    video_num = "0" + str(video_num)
else:
    video_num = str(video_num)

# filter fore pose path that starts with FH00 + video_num
pose_path = [p for p in pose_paths if "FH00" + str(video_num) in p]
assert len(pose_path) > 0, "No pose path found"
assert len(pose_path) < 2, "Multiple pose paths found"
pose_path = pose_path[0]


In [None]:

print(trigger_path)
print(pose_path)

### determine trials from triggers

In [None]:
# load trigger data
data = h5py.File(trigger_path, 'r')
data.keys()

<KeysViewHDF5 ['Ch1', 'Ch2', 'Ch3', 'Ch31', 'Ch32', 'Ch4', 'Ch5', 'Ch6', 'file']>

Channel 3: TTLs an die Kamera\
Channel 4: Sound trigger\
Channel 31: Keyboard

In [None]:
ch3 = data["Ch3"]
ch4 = data["Ch4"]
ch31 = data["Ch31"]

key_times = np.array(ch31["times"]).flatten()[1:-1]
start_time = np.array(ch31["times"]).flatten()[0]

ttl = np.array(ch3["values"]).flatten()
ttl_times = np.array(ch3["times"]).flatten()

sound = np.array(ch4["values"]).flatten()
sound_times = np.array(ch4["times"]).flatten()

key_times

array([75.877426, 99.681398])

In [None]:
# ttl triggers
TTL_THRESHOLD = 2 # around 2-10, 2 should be fine
    
frames_idx = np.where(np.diff(ttl) > 2)[0]
frames_idx.shape

(3359,)

In [None]:
# index of key press in ttl/sound index (25000 Hz)
key_idx = np.array([np.argmax(ttl_times > k) for k in key_times])

In [None]:
# get number of first frame for each trial, from 1s before key press to 2s after

trial_start_frame_no = [np.argmax(frames_idx > k - SAMPLE_RATE * PRETRIAL_TIME) for k in key_idx]
trial_end_frame_no = [np.argmax(frames_idx > k + SAMPLE_RATE * POSTTRIAL_TIME) for k in key_idx]

In [None]:
trial_start_frame_no

[1849, 2444]

### fetch pose data for trials

#### Preprocess pose data

In [None]:
# Rearrange csv from Deeplabcut to a more manageable format.
df = pd.read_csv(pose_path, skiprows=1)  # .reset_index(drop=True)
df = df.T.reset_index()
df["index"] = [
    "bodyparts",
    "nose",
    "nose",
    "nose",
    "left_ear",
    "left_ear",
    "left_ear",
    "right_ear",
    "right_ear",
    "right_ear",
]
df = df.set_index(["index", 0])


df.index.names = ["bodyparts", "coords"]
df = df.drop("bodyparts", level=0)
df = df.T
df.index.names = ["frame"]
df

bodyparts,nose,nose,nose,left_ear,left_ear,left_ear,right_ear,right_ear,right_ear
coords,x,y,likelihood,x,y,likelihood,x,y,likelihood
frame,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1,847.8878784179688,224.12966918945312,0.7783203125,855.67138671875,238.95225524902344,0.7587890625,872.36572265625,233.89950561523438,0.34521484375
2,844.1484985351562,220.18011474609375,0.8359375,851.1298828125,234.35708618164062,0.7138671875,868.912841796875,229.79464721679688,0.395263671875
3,842.4306030273438,215.1500244140625,0.79443359375,847.646728515625,230.30935668945312,0.67333984375,865.7194213867188,225.407958984375,0.32177734375
4,841.1531372070312,210.32666015625,0.74267578125,847.1686401367188,224.93621826171875,0.6748046875,865.2796630859375,220.11978149414062,0.43896484375
5,839.138427734375,205.3473663330078,0.79296875,844.8896484375,220.87632751464844,0.6708984375,862.8052978515625,215.9651336669922,0.4169921875
...,...,...,...,...,...,...,...,...,...
3325,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3326,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3327,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3328,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [None]:
# supress a warning (should be no problem according to stackoverflow)
pd.options.mode.chained_assignment = None  # default='warn'

# iterate trials
for trial, (start_frame, end_frame) in enumerate(zip(trial_start_frame_no, trial_end_frame_no)):
    trial_df = df.iloc[start_frame:end_frame]

    # add trial number to index
    trial_df['trial_number'] = trial
    trial_df.reset_index(inplace=True)
    trial_df.set_index(['trial_number', "frame"], inplace=True)
    

In [None]:
trial_df

Unnamed: 0_level_0,bodyparts,nose,nose,nose,left_ear,left_ear,left_ear,right_ear,right_ear,right_ear
Unnamed: 0_level_1,coords,x,y,likelihood,x,y,likelihood,x,y,likelihood
trial_number,frame,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
1,2445,899.473876953125,359.7204895019531,0.62548828125,906.7261962890625,354.0067138671875,0.321533203125,906.501220703125,356.40057373046875,0.21826171875
1,2446,899.47216796875,360.2377624511719,0.63720703125,905.7684326171875,354.5341491699219,0.33056640625,906.1481323242188,356.96075439453125,0.185302734375
1,2447,899.4775390625,360.23797607421875,0.63720703125,905.7677001953125,354.5341491699219,0.330322265625,906.1455688476562,356.9624938964844,0.1851806640625
1,2448,899.4257202148438,359.28863525390625,0.63330078125,905.8002319335938,354.71942138671875,0.339599609375,906.42236328125,356.93292236328125,0.2008056640625
1,2449,899.0914306640625,360.3277893066406,0.59814453125,906.1694946289062,354.51934814453125,0.323974609375,913.58935546875,351.8052978515625,0.1925048828125
1,...,...,...,...,...,...,...,...,...,...
1,2515,899.2801513671875,360.70745849609375,0.63916015625,907.2897338867188,353.40283203125,0.305908203125,906.9530639648438,356.7421875,0.306884765625
1,2516,714.3673706054688,651.0740966796875,0.32080078125,711.9175415039062,637.98193359375,0.427001953125,714.0379638671875,635.519287109375,0.089111328125
1,2517,715.8302001953125,650.6109008789062,0.306640625,712.1506958007812,638.2376098632812,0.386474609375,714.2783813476562,635.7891845703125,0.08685302734375
1,2518,715.6358032226562,651.22705078125,0.282958984375,712.52099609375,638.048583984375,0.396728515625,717.1887817382812,648.4009399414062,0.09674072265625


# put all trials of a mouse into one DataFrame

have a trial number running for each mouse

### functions

#### data paths

In [7]:
def get_data_paths(mouse_id, recording_num, recording_notes):

    recording_notes = recording_notes[recording_notes["mouse_id"] == mouse_id]
    recording_notes = recording_notes.reset_index(drop=True)
    
    # FOLDER
    folder_path = "../data/A{mouse_id}/".format(mouse_id=mouse_id)

    # read TRIGGER and POSE combination
    recording_note = recording_notes.iloc[recording_num]
    trigger_num = recording_note["trigger_num"]
    video_num = recording_note["video_num"]

    # TRIGGER PATH
    trigger_path = folder_path + "Data{trigger_num}".format(trigger_num=trigger_num) + ".mat"

    # POSE PATH
    # read all pose files for that mouse
    pose_paths = glob.glob(folder_path + "*.csv")

    # format video num to two digits
    if video_num < 10:
        video_num = "0" + str(video_num)
    else:
        video_num = str(video_num)

    # filter fore pose path that starts with FH00 + video_num
    pose_path = [p for p in pose_paths if str(video_num).zfill(4) in p]
    assert len(pose_path) > 0, "No pose path found for video number {}".format(video_num)
    assert len(pose_path) < 2, "Multiple pose paths found for video number {}".format(video_num)
    pose_path = pose_path[0]
    
    return trigger_path, pose_path

#### trials from triggers

In [8]:
def get_trials(trigger_path, pose_path):
    # load trigger data
    data = h5py.File(trigger_path, 'r')
    data.keys()

    ch3 = data["Ch3"]
    ch4 = data["Ch4"]
    ch31 = data["Ch31"]

    key_times = np.array(ch31["times"]).flatten()[1:-1]
    start_time = np.array(ch31["times"]).flatten()[0]

    ttl = np.array(ch3["values"]).flatten()
    ttl_times = np.array(ch3["times"]).flatten()

    sound = np.array(ch4["values"]).flatten()
    sound_times = np.array(ch4["times"]).flatten()

    # ttl triggers
    frames_idx = np.where(np.diff(ttl) > 2)[0]

    # index of key press in ttl/sound index (25000 Hz)
    key_idx = np.array([np.argmax(ttl_times > k) for k in key_times])

    # get number of first frame for each trial, from 1s before key press to 2s after
    trial_start_frame_no = [np.argmax(frames_idx > k - SAMPLE_RATE * PRETRIAL_TIME) for k in key_idx]
    trial_end_frame_no = [np.argmax(frames_idx > k + SAMPLE_RATE * POSTTRIAL_TIME) for k in key_idx]
    
    return trial_start_frame_no, trial_end_frame_no

#### get pose data

In [9]:
def get_pose_data(pose_path):
    # Rearrange csv from Deeplabcut to a more manageable format.
    df = pd.read_csv(pose_path, skiprows=1)  # .reset_index(drop=True)
    df = df.T.reset_index()
    df["index"] = [
        "bodyparts",
        "nose",
        "nose",
        "nose",
        "left_ear",
        "left_ear",
        "left_ear",
        "right_ear",
        "right_ear",
        "right_ear",
    ]
    df = df.set_index(["index", 0])


    df.index.names = ["bodyparts", "coords"]
    df = df.drop("bodyparts", level=0)
    df = df.T
    df.index.names = ["frame"]
    
    return df

#### dataframe with all trials for one mouse

In [10]:
def load_mouse_trials(mouse_id, recording_notes):
    # get number of recordings for that mouse
    num_recordings = recording_notes[recording_notes["mouse_id"] == mouse_id].shape[0]

    # supress a warning (should be no problem according to stackoverflow)
    pd.options.mode.chained_assignment = None
    trial_no = 0
    recording_df_list = []

    # iterate recordings
    for recording_num in range(num_recordings):
        # iterate trials in recording
        trigger_path, pose_path = get_data_paths(mouse_id, recording_num, recording_notes)
        trial_start_frame_no, trial_end_frame_no = get_trials(trigger_path, pose_path)
        df_pose = get_pose_data(pose_path)

        trial_df_list = []

        for start_frame, end_frame in zip(trial_start_frame_no, trial_end_frame_no):

            trial_df = df_pose.iloc[start_frame:end_frame]

            # add trial number to index
            trial_df['trial_number'] = trial_no
            trial_df.reset_index(inplace=True)
            trial_df.set_index(['trial_number', "frame"], inplace=True)

            trial_df_list.append(trial_df)

            trial_no += 1
        try:
            df_recording = pd.concat(trial_df_list)
            recording_df_list.append(df_recording)

        except ValueError:
            print("No trials found for recording number {}".format(recording_num))
            continue

    df_mouse= pd.concat(recording_df_list)
    
    return df_mouse

## run

In [11]:
recording_notes

Unnamed: 0,mouse_id,filename,video_nr,trigger_num,video_num
0,1,Data1,vid2,1,2
1,1,Data2,vid3,2,3
2,1,Data3,vid6,3,6
3,1,Data4,vid7,4,7
4,1,Data5,vid8,5,8
...,...,...,...,...,...
77,6,Data78,vid101,78,101
78,6,Data79,vid104,79,104
79,6,Data80,vid105,80,105
80,6,Data81,vid108,81,108


In [12]:
mouse_id = 6
df_mouse = load_mouse_trials(mouse_id, recording_notes)
df_mouse

Unnamed: 0_level_0,bodyparts,nose,nose,nose,left_ear,left_ear,left_ear,right_ear,right_ear,right_ear
Unnamed: 0_level_1,coords,x,y,likelihood,x,y,likelihood,x,y,likelihood
trial_number,frame,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
0,501,203.2786,348.31708,0.90722656,203.30386,359.14722,0.6357422,209.96613,355.40863,0.859375
0,502,201.36856,343.04532,0.8383789,200.85994,353.9245,0.6796875,208.55817,350.56732,0.8652344
0,503,199.40384,339.09326,0.88623047,197.62718,349.7438,0.6801758,205.92543,346.51584,0.83935547
0,504,195.13237,336.79114,0.8457031,195.08832,347.48026,0.69140625,202.54729,343.0028,0.81347656
0,505,191.6336,334.18167,0.85546875,191.65677,344.21225,0.62939453,198.93848,340.3069,0.8041992
...,...,...,...,...,...,...,...,...,...,...
13,344,505.14648,473.5829,0.2253418,506.43738,476.4575,0.14196777,520.9384,471.24963,0.24804688
13,345,505.14648,473.58344,0.22546387,506.43738,476.4575,0.14196777,520.9391,471.25067,0.24841309
13,346,505.14648,473.58344,0.22546387,506.43738,476.4575,0.14196777,520.9391,471.25067,0.24841309
13,347,504.97723,473.3974,0.22338867,506.2066,476.1638,0.14782715,520.97156,471.53824,0.27075195


In [None]:
df_mouse.to_csv("../data/A{}/trials.csv".format(mouse_id))