Import modules and define functions

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import ssm
from utils import *

#temp workaround since it gets removed from the path whenever the kernel is restarted
import sys
sys.path.insert(0, '/Users/gabriel/lib/')
import chiCa

%matplotlib widget

def get_session_dates(file_names):
    session_dates = []
    for file in file_names:
        split_path = file.split('/')
        session_dates.append(split_path[5]) #with the churchland data folder structure, this corresponds to the date

    return session_dates

def get_session_averages(file_names, min_stims=5):
    session_performance_averages = []
    for file in file_names: 
        session_data = pd.read_hdf(file)
        stim_rates = np.unique(np.array([len(timestamps) for timestamps in session_data.stimulus_event_timestamps]))
        if len(stim_rates)>min_stims:
            performance = np.array(session_data.outcome_record, dtype=float)
            performance[performance == -1] = np.nan #setting early withdrawal trials as nans
            performance[performance == 2] = np.nan #setting no response trials as nans
            session_performance_averages.append(np.nanmean(performance))

    return session_performance_averages

def get_filtered_session_averages_and_dates(file_names, min_stims=5):
    filtered_dates = []
    filtered_session_averages = []
    stims = []
    for file in file_names:
        session_data = pd.read_hdf(file)
        stim_rates = np.array([len(timestamps) for timestamps in session_data.stimulus_event_timestamps])
        unique_stims = list(np.unique(stim_rates))
        valid_trials = np.logical_or(stim_rates == 4, stim_rates == 20)
        if len(unique_stims)>min_stims:
            #allocate performance
            performance = np.array(session_data.outcome_record, dtype=float)
            performance[performance == -1] = np.nan #setting early withdrawal trials as nans
            performance[performance == 2] = np.nan #setting no response trials as nans
            filtered_session_averages.append(np.nanmean(performance[valid_trials]))

            #allocate date
            split_path = file.split('/')
            filtered_dates.append(split_path[5])

            #allocate unique_stims
            stims.append(unique_stims)

    return filtered_dates, filtered_session_averages, stims

def get_file_names(animal_name, data_type, file_extension, file_keyword=None):
    '''Tool to select specified files of a data type over all sessions for a given animal.
    This relies on the hierarchical Churchland lab data folder structure with:
    animal_name -> session_datetime -> data_type
    
    Adapted from Lukas Oesch's `chipmunk_analysis_tools.py`.
       
    
    Parameters
    ----------
    animal_name: str, the name of the animal whose sessions are to be selected.
    data_type: str, the directory with the specific data type, for example chipumnk, caiman, etc.
    file_extension: str, file extension specifier, for example *.mat
    file_keyword: str, a pattern that should be detected inside the file name to
                  distinguish the desired files from other files with the same extension.
    
    Returns
    -------
    file_names: list, list of file names selected
    
    Examples
    --------
    file_names = get_file_names('GRB001', 'chipmunk', '*.h5')
    '''
    import os
    import glob
    
    home_dir = os.path.expanduser("~")
    session_dirs = glob.glob(f"{home_dir}/data/{animal_name}/*/")
    
    file_names = []
    for session_dir in session_dirs:
        data_type_dir = os.path.join(session_dir, data_type)
        file_paths = glob.glob(os.path.join(data_type_dir, file_extension))
        for file_path in file_paths:
            if file_keyword is None or file_keyword in file_path:
                file_names.append(file_path)
    
    file_names.sort()
    
    return file_names

def count_stimulus_timestamps(row):
    data = pd.read_hdf(row['path'])
    stimulus_event_timestamps = data['stimulus_event_timestamps']
    trial_lengths = [len(trial) for trial in stimulus_event_timestamps]
    return trial_lengths

def get_response_side(row):
    data = pd.read_hdf(row['path'])
    response_side = np.array(data['response_side'])
    return response_side

def get_filtered_performance_data(paths_all_animals, performance_threshold=0.8, min_stims=5):
    group_performance_data = {}

    for animalID, paths in paths_all_animals.items():
        session_dates, session_averages, unique_stims = get_filtered_session_averages_and_dates(paths, min_stims=min_stims)
        performance_by_day = pd.DataFrame(data={'date':session_dates,'performance average':session_averages, 'unique stims':unique_stims})
        group_performance_data.update({animalID:performance_by_day})

    filtered_group_performance_data = {}

    for animal_id, performance_data in group_performance_data.items():
        filtered_performance_data = performance_data[performance_data['performance average'] >= performance_threshold].reset_index(drop=True)
        filtered_dates = filtered_performance_data['date'].tolist()
        filtered_paths = []
        for path in paths_all_animals[animal_id]:
            if '/' in path:
                split_path = path.split('/')
                if split_path[5] in filtered_dates:
                    filtered_paths.append(path)
        filtered_group_performance_data[animal_id] = {'performance data': filtered_performance_data, 'filtered paths': filtered_paths}

    filtered_data_list = []

    for animal_id, data in filtered_group_performance_data.items():
        filtered_data_list.append(data['performance data'])

    concatenated_data = pd.concat(filtered_data_list, keys=filtered_group_performance_data.keys())
    concatenated_data = concatenated_data.reset_index().rename(columns={'level_0': 'animal_id', 'level_1': 'row_index'})

    filtered_paths = []
    for animal_id, animal_data in filtered_group_performance_data.items():
        for filtered_path in animal_data['filtered paths']:
            filtered_paths.append([animal_id, filtered_path])

    df_filtered_paths = pd.DataFrame(filtered_paths, columns=['animalID', 'path'])

    df_filtered_paths['data'] = df_filtered_paths['path'].apply(pd.read_hdf)
    df_filtered_paths['trials_per_session'] = df_filtered_paths['data'].apply(len)
    df_filtered_paths['num_completed_trials'] = df_filtered_paths['data'].apply(lambda x: len(x[x['outcome_record'].isin([-1, 2]) == False]))
    df_filtered_paths['stimuli_presented'] = df_filtered_paths.apply(count_stimulus_timestamps, axis=1)
    df_filtered_paths['stimulus_intensity'] = df_filtered_paths['stimuli_presented'].apply(lambda x: [i - 12 for i in x])
    df_filtered_paths['response_side'] = df_filtered_paths.apply(get_response_side, axis=1)
    df_filtered_paths['choice'] = df_filtered_paths['response_side'].apply(lambda x: [val for val in x if not pd.isna(val)])
    
    all_data = df_filtered_paths

    return all_data

Convert files to .h5

In [13]:
fn = chiCa.pick_files_multi_session('chipmunk', '*.mat')
chiCa.convert_specified_behavior_sessions(fn)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block2_values] [items->Index(['Port1In', 'Port1Out', 'Port2In', 'Port2Out', 'Port3In', 'Port3Out',
       'Tup', 'DemonCenterFixationPeriod', 'DemonDidNotChoose',
       'DemonEarlyWithdrawal', 'DemonEarlyWithdrawalPunishment', 'DemonGoCue',
       'DemonInitFixation', 'DemonReward', 'DemonWaitForCenterFixation',
       'DemonWaitForResponse', 'DemonWaitForWithdrawalFromCenter',
       'DemonWrongChoice', 'DemonWrongChoicePunishment', 'FinishTrial',
       'PlayStimulus', 'PreStimPeriod', 'Sync', 'TrialStart',
       'stimulus_modality', 'stimulus_event_timestamps', 'demonstrator_ID',
       'outcome_presentation', 'response_port_out'],
      dtype='object')]

  trialdata.to_hdf(os.path.splitext(current_file)[0] + '.h5', '/Data') #Save as hdf5
your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_t

['/Users/gabriel/data/GRB009/20230808_133123/chipmunk/GRB009_20230808_133123_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230809_145413/chipmunk/GRB009_20230809_145413_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230810_140205/chipmunk/GRB009_20230810_140205_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230811_144234/chipmunk/GRB009_20230811_144234_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230814_141111/chipmunk/GRB009_20230814_141111_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230815_142151/chipmunk/GRB009_20230815_142151_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230816_133822/chipmunk/GRB009_20230816_133822_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230817_140812/chipmunk/GRB009_20230817_140812_chipmunk_DemonstratorAudiTask.h5',
 '/Users/gabriel/data/GRB009/20230818_143846/chipmunk/GRB009_20230818_143846_chipmunk_DemonstratorAudiTa

In [None]:
def get_performance_averages():
    file_names = chiCa.pick_files_multi_session("chipmunk", "*.h5")
    session_performance_averages = []
    for file in file_names:
        session_data = pd.read_hdf(file)
        stim_rates = np.array([len(timestamps) for timestamps in session_data.stimulus_event_timestamps])
        performance = np.array(session_data.outcome_record, dtype=float)
        performance[performance == -1] = np.nan #setting early withdrawal trials as nans
        performance[performance == 2] = np.nan #setting no response trials as nans
        performance[(stim_rates != 4) & (stim_rates != 20)] = np.nan #setting non easy trials (4 and 20 Hz) to nan
        session_performance_averages.append(np.nanmean(performance))

    return session_performance_averages


GRB001_perf = get_performance_averages()
GRB002_perf = get_performance_averages()
GRB003_perf = get_performance_averages()
GRB004_perf = get_performance_averages()

data = {'GRB001':GRB001_perf, 'GRB002':GRB002_perf, 'GRB003':GRB003_perf, 'GRB004':GRB004_perf}

# Define the data for the subplots
males = {'GRB001':GRB001_perf, 'GRB002':GRB002_perf}
females = {'GRB003':GRB003_perf, 'GRB004':GRB004_perf}

male_colors = ['b', 'deepskyblue']
female_colors = ['r', 'lightcoral']

fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))

#males plot
for i, (label, values) in enumerate(males.items()):
    ax1.plot(values, label=label, color=male_colors[i])
ax1.set_xlabel('Sessions')
ax1.set_ylabel('Session Performance')
ax1.hlines(0.8, 0, len(session_performance_averages), color='grey', linestyle='dotted')
ax1.legend()

#females plot
for i, (label, values) in enumerate(females.items()):
    ax2.plot(values, label=label, color=female_colors[i])
ax2.set_xlabel('Sessions')
ax2.set_ylabel('Session Performance')
ax2.hlines(0.8, 0, len(session_performance_averages), color='grey', linestyle='dotted')
ax2.legend()

fig.suptitle('Performance by Sex')
plt.show()


Align behavioral data to video

In [None]:
camlogs = chiCa.pick_files_multi_session("chipmunk", "*.camlog", "BackStereo") #should rename this to camlog file names

# Align data
for file in camlogs:
    t = str.split(file, '/')[5]
    print('------------------------------')
    print(f'Using default alignment function for {t}.')
    try:
        chiCa.align_behavioral_video(file)
    except:
        print("There was an issue with the camlog file for the current session:", file)
        print("Continuing to the next one...")
        continue

Grab filtered data

In [16]:
GRB005_paths = get_file_names('GRB005', 'chipmunk', '*.h5')
GRB006_paths = get_file_names('GRB006', 'chipmunk', '*.h5')
GRB007_paths = get_file_names('GRB007', 'chipmunk', '*.h5')

paths_all_animals = {'GRB005':GRB005_paths,
                     'GRB006':GRB006_paths,
                     'GRB007':GRB007_paths}

all_data = get_filtered_performance_data(paths_all_animals=paths_all_animals, performance_threshold=0.7, min_stims=5)

Grab aligned data

In [39]:
t = np.load('/Users/gabriel/data/GRB007/20231101_144710/analysis/GRB007_20231101_144710_chipmunk_DemonstratorAudiTask_BackStereoView_00000000_video_alignment.npy', allow_pickle=True)
t = t.item()
t['trial_starts'][0]

12

Create a dictionary to have the session IDs handy that passed the performance threshold.

In [63]:
good_sessions = {}
for index, row in all_data.iterrows():
    split_path = row['path'].split('/')
    animalID = split_path[4]
    date = split_path[5]
    #now add the animal ID as a key if it doesn't exist, and add the date to the list of dates
    if animalID not in good_sessions.keys():
        good_sessions[animalID] = [date]
    else:
        good_sessions[animalID].append(date)


In [70]:
good_sessions['GRB007']

['20231004_135927',
 '20231005_144336',
 '20231006_153149',
 '20231009_155246',
 '20231010_145209',
 '20231011_145613',
 '20231012_151257',
 '20231016_150708',
 '20231017_160801',
 '20231018_170851',
 '20231019_135758',
 '20231020_150357',
 '20231023_155038',
 '20231024_150818',
 '20231026_125027',
 '20231027_140706']

: 