In [24]:
import glob
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd


# supress a warning (should be no problem according to stackoverflow)
pd.options.mode.chained_assignment = None  # default='warn'

In [25]:
PRETRIAL_TIME = 1  # seconds
POSTTRIAL_TIME = 5  # seconds
SAMPLE_RATE = 25000  # Hz

Channel 3: TTLs an die Kamera\
Channel 4: Sound trigger\
Channel 31: Keyboard

have a trial number running for each mouse

this script segments raw data from recordings into trials. the follwing steps are performed:

1. recording notes are needed for combining pairs of DeepLaCut pose data with trigger data from Spike2. recording notes are imported and converted from human-readable to only numbers.
2. filepaths are needed for importing corresponding files. a function for converting info from recording notes into file paths is defined.
3. Spike2 data contains information when a trials starts and ends, this information is extracted from Key presses by the experimenter that are logged in Spike2 data. <br>a function is defined that returns the start frame number and end frame number of a trials. one recording may contain multiple trials.
4. pose data is imported. a few steps of formatting the dataframe containing pose data is done to improve readability of that data frame.
5. steps 2. , 3. , 4. are repeated for every recording file of a mouse.
6. the above steps result in one dataframe per mouse, that is then saved into the data folder of that mouse as "trials.csv"

In [26]:
# insert mouse ID and run all cells below to segment data for that mouse
mouse_id = 4

### 1. read recording notes

In [27]:
# load and prepare recording notes

# pair of .mat file and .csv file
# pairs defined in Recordings_list.csv

recording_notes = pd.read_csv(
    "../data/Recordings_list.csv",
    usecols=["mouse_id", "filename", "video_nr"],
    header=0,
    sep=";",
)

def get_name_vid_nums(row):
    name_num = int(row["filename"].replace("Data", ""))
    vid_num = int(row["video_nr"].replace("vid", ""))
    return pd.Series({"trigger_num": name_num, "video_num": vid_num})


recording_notes[["trigger_num", "video_num"]] = recording_notes.apply(get_name_vid_nums, axis=1)
recording_notes

Unnamed: 0,mouse_id,filename,video_nr,trigger_num,video_num
0,1,Data1,vid2,1,2
1,1,Data2,vid3,2,3
2,1,Data3,vid6,3,6
3,1,Data4,vid7,4,7
4,1,Data5,vid8,5,8
...,...,...,...,...,...
77,6,Data78,vid101,78,101
78,6,Data79,vid104,79,104
79,6,Data80,vid105,80,105
80,6,Data81,vid108,81,108


### 2. file paths

In [28]:
def get_data_paths(mouse_id, recording_num, recording_notes):

    recording_notes = recording_notes[recording_notes["mouse_id"] == mouse_id]
    recording_notes = recording_notes.reset_index(drop=True)
    
    # FOLDER
    folder_path = "../data/A{mouse_id}/".format(mouse_id=mouse_id)

    # read TRIGGER and POSE combination
    recording_note = recording_notes.iloc[recording_num]
    trigger_num = recording_note["trigger_num"]
    video_num = recording_note["video_num"]

    # TRIGGER PATH
    trigger_path = folder_path + "Data{trigger_num}".format(trigger_num=trigger_num) + ".mat"

    # POSE PATH
    # read all pose files for that mouse
    pose_paths = glob.glob(folder_path + "*.csv")

    # format video num to two digits
    if video_num < 10:
        video_num = "0" + str(video_num)
    else:
        video_num = str(video_num)

    # filter fore pose path that starts with FH00 + video_num
    pose_path = [p for p in pose_paths if str(video_num).zfill(4) in p]
    assert len(pose_path) > 0, "No pose path found for video number {}".format(video_num)
    assert len(pose_path) < 2, "Multiple pose paths found for video number {}".format(video_num)
    pose_path = pose_path[0]
    
    return trigger_path, pose_path

### 3. trials from triggers

In [29]:
def get_trials(trigger_path, pose_path):
    # load trigger data
    data = h5py.File(trigger_path, 'r')

    data.keys()

    ch3 = data["Ch3"]
    ch4 = data["Ch4"]
    ch31 = data["Ch31"]

    key_times = np.array(ch31["times"]).flatten()[1:-1]
    start_time = np.array(ch31["times"]).flatten()[0]

    ttl = np.array(ch3["values"]).flatten()
    ttl_times = np.array(ch3["times"]).flatten()

    sound = np.array(ch4["values"]).flatten()
    sound_times = np.array(ch4["times"]).flatten()

    # ttl triggers
    frames_idx = np.where(np.diff(ttl) > 2)[0]

    # index of key press in ttl/sound index (25000 Hz)
    key_idx = np.array([np.argmax(ttl_times > k) for k in key_times])

    # get number of first frame for each trial, from 1s before key press to 2s after
    trial_start_frame_no = [np.argmax(frames_idx > k - SAMPLE_RATE * PRETRIAL_TIME) for k in key_idx]
    trial_end_frame_no = [np.argmax(frames_idx > k + SAMPLE_RATE * POSTTRIAL_TIME) for k in key_idx]
    
    return trial_start_frame_no, trial_end_frame_no

### 4. get pose data

In [None]:
def get_pose_data(pose_path):
    # Rearrange csv from Deeplabcut to a more manageable format.
    df = pd.read_csv(pose_path, skiprows=1)  # .reset_index(drop=True)
    df = df.T.reset_index()
    df["index"] = [
        "bodyparts",
        "nose",
        "nose",
        "nose",
        "left_ear",
        "left_ear",
        "left_ear",
        "right_ear",
        "right_ear",
        "right_ear",
    ]
    df = df.set_index(["index", 0])

    df.index.names = ["bodyparts", "coords"]
    df = df.drop("bodyparts", level=0)
    df = df.T
    df.index.names = ["frame"]
    
    return df

### 5. run for each recording

In [31]:
num_recordings = recording_notes[recording_notes["mouse_id"] == mouse_id].shape[0]

trial_no = 0
recording_df_list = []

# iterate recordings
for recording_num in range(num_recordings):
    # iterate trials in recording
    trigger_path, pose_path = get_data_paths(mouse_id, recording_num, recording_notes)
    trial_start_frame_no, trial_end_frame_no = get_trials(trigger_path, pose_path)
    df_pose = get_pose_data(pose_path)

    trial_df_list = []

    for start_frame, end_frame in zip(trial_start_frame_no, trial_end_frame_no):

        trial_df = df_pose.iloc[start_frame:end_frame]

        # add trial number to index
        trial_df['trial_number'] = trial_no
        trial_df.reset_index(inplace=True)
        trial_df.set_index(['trial_number', "frame"], inplace=True)

        trial_df_list.append(trial_df)

        trial_no += 1
    try:
        df_recording = pd.concat(trial_df_list)
        recording_df_list.append(df_recording)

    except ValueError:
        print("No trials found for recording number {}".format(recording_num))
        continue

df_mouse= pd.concat(recording_df_list)

No trials found for recording number 6


### 6. save file

In [32]:
df_mouse.to_csv("../data/A{}/trials.csv".format(mouse_id))