In [1]:
import os

import analysis_session_helper_functions_c3 as helper
import analysis_utils as utils

import pandas as pd

In [2]:
cohort = 'cohort_3'
to_analyze = 'full_clean'
data_dir = '/Users/rebekahzhang/Documents/shuler_lab/behavior_data'
data_folder = os.path.join(data_dir, cohort, to_analyze)
print(data_folder)

/Users/rebekahzhang/Documents/shuler_lab/behavior_data/cohort_3/full_clean


# Generates all session logs
does not need to be run if session logs have been generated

In [3]:
session_log = helper.generate_all_session_log(data_folder)
session_log.head()

Unnamed: 0,date,mouse,dir,filename
0,2023-07-25,RZ021,2023-07-25_13-42-06_RZ021,data_RZ021_2023-07-25_13-42-06.txt
1,2023-07-26,RZ020,2023-07-26_13-28-27_RZ020,data_RZ020_2023-07-26_13-28-27.txt
2,2023-07-22,RZ020,2023-07-22_12-16-16_RZ020,data_RZ020_2023-07-22_12-16-16.txt
3,2023-07-25,RZ020,2023-07-25_13-40-34_RZ020,data_RZ020_2023-07-25_13-40-34.txt
4,2023-07-23,RZ019,2023-07-23_13-38-21_RZ019,data_RZ019_2023-07-23_13-38-21.txt


get a list of mouse names

In [4]:
# prints mouse names in data folder, check for weird ones and delete from data base
mouse_list = session_log.mouse.unique().tolist()
mouse_list.sort()
print(mouse_list)

['RZ018', 'RZ019', 'RZ020', 'RZ021', 'RZ022', 'RZ023', 'RZ024', 'RZ025']


get the type of training for session meta data and add to log

In [5]:
training_list = []
for dir_name, file_name in zip(session_log.dir, session_log.filename):
    session_meta = utils.load_session_meta(data_folder, dir_name, file_name)
    training = session_meta.training.tolist()[0]
    training_list.append(training)

session_log['training'] = training_list

add columns of basic info to each session to log

In [6]:
session_basics_columns = ['num_blocks', 'num_trials', 'rewards', 'time', 'proper_end']
for dir_name, file_name in zip(session_log.dir, session_log.filename):
    session = utils.load_session(data_folder, dir_name, file_name)
    session_basic = helper.get_session_basics(session)
    session_log.loc[session_log.dir == dir_name, session_basics_columns] = session_basic

### Focus only on regular training sessions

In [7]:
training_session_log = session_log.loc[session_log.training == 'regular'].sort_values('dir').reset_index()
training_session_log.head()

Unnamed: 0,index,date,mouse,dir,filename,training,num_blocks,num_trials,rewards,time,proper_end
0,31,2023-07-25,RZ018,2023-07-25_10-24-14_RZ018,data_RZ018_2023-07-25_10-24-14.txt,regular,0.0,351.0,181.5,1767.68,True
1,36,2023-07-25,RZ020,2023-07-25_10-28-09_RZ020,data_RZ020_2023-07-25_10-28-09.txt,regular,0.0,361.0,137.9,2528.2,True
2,22,2023-07-25,RZ019,2023-07-25_10-56-16_RZ019,data_RZ019_2023-07-25_10-56-16.txt,regular,0.0,338.0,97.2,1639.49,True
3,38,2023-07-25,RZ021,2023-07-25_11-12-50_RZ021,data_RZ021_2023-07-25_11-12-50.txt,regular,0.0,353.0,629.6,2835.79,True
4,30,2023-07-25,RZ018,2023-07-25_13-05-38_RZ018,data_RZ018_2023-07-25_13-05-38.txt,regular,0.0,336.0,303.5,1915.5,True


### Examine quality of sessions
doesn't need to run when data folder is cleaned

check for short sessions, prints out dir

In [8]:
short_session = training_session_log.loc[(training_session_log['training'] == 'regular') & 
                                         (training_session_log['num_trials'] < 100)] 
print(short_session.dir, short_session.num_trials)

Series([], Name: dir, dtype: object) Series([], Name: num_trials, dtype: float64)


check for missing sessions by the number of sessions in each training day

In [None]:
num_mice = 8
date_list = training_session_log.date.unique().tolist()
for date in date_list:
    data = training_session_log.loc[training_session_log['date'] == date]
    if len(data) < num_mice:
        print(date)

check for same mouse with multiple sessions per day, prints out date and mouse if too many

In [None]:
# prints dates of the same mouse with multiple sessions
for d in training_session_log.date.unique().tolist():
    session_of_the_day = training_session_log.loc[training_session_log['date'] == d]
    for mouse in mouse_list:
        count = len(session_of_the_day.loc[session_of_the_day['mouse'] == mouse])
        if count > 1:
            print(d, mouse)   

make a copy of cleaned data before preceeding!

### Add training session number to training log

In [9]:
training_session_log.sort_values(by=['mouse', 'dir'], inplace=True)
for mouse in mouse_list:
    total_sessions = sum(training_session_log.mouse == mouse)
    training_session_log.loc[training_session_log.mouse == mouse, 'session_num'] = list(range(total_sessions))
training_session_log.session_num = training_session_log.session_num.astype(int)

In [10]:
training_session_log.session_num.max()

3

### Saves all sessions log and training session log

In [11]:
filename = f'all_sessions.csv'
path = os.path.join(data_folder, filename)
session_log.to_csv(path)

In [12]:
filename = f'training_sessions.csv'
path = os.path.join(data_folder, filename)
training_session_log.to_csv(path)

# Generate all trials per session

load session log

In [13]:
training_session_log = utils.load_session_log(data_folder, 'training_sessions.csv')

### Raw data processing and generate initial all trials df
adds trial numbers and states to hardware entries
create all trials df with trial basics added
both files saved, 
does't need to be rerun

In [14]:
all_trials_column_names = ['session_trial_num', 'block_trial_num', 'block_num', 'start_time', 'end_time']

In [15]:
for dir_name, file_name in zip(training_session_log.dir, training_session_log.filename):
    
    processed_path = os.path.join(data_folder, dir_name, f'processed_{file_name[:-4]}.csv')
    all_trials_path =os.path.join(data_folder, dir_name, f'{dir_name}_all_trials.csv')
    # if os.path.isfile(processed_path) and os.path.isfile(all_trials_path):
    #     continue

    session = utils.load_session(data_folder, dir_name, file_name)
    session['trial_time'] = ''
    total_trial_list = helper.generate_total_trial_list(training_session_log, dir_name)
    all_trials = pd.DataFrame(columns=all_trials_column_names)

    for t in total_trial_list:
        trial = session.loc[session['session_trial_num'] == t]
        
        trial_basics = helper.get_trial_basics(trial)
        helper.align_trial_number(session, trial_basics)

        trial_state_times = helper.get_trial_state_times(trial)
        helper.align_trial_states(session, trial_state_times, trial_basics)

        trial = session.loc[session['session_trial_num'] == t]
        helper.add_trial_time(session, t, trial, trial_basics)

        trial_basics = pd.DataFrame([trial_basics])
        all_trials = pd.concat([all_trials, trial_basics], ignore_index=True)
    
    session = utils.trim_session(training_session_log, dir_name, session)
    session.to_csv(processed_path)
    all_trials.to_csv(all_trials_path)

### Adding analyzed trial data to all trials df

In [None]:
all_trials_data_column = ['bg_drawn', 'blk_type', 'bg_length', 
                          'miss_trial', 'good_trial', 'time_waited', 'reward', 'num_consumption_lick']

In [None]:
for dir_name, file_name in zip(training_session_log.dir, training_session_log.filename):
    all_trials_analyzed_path =os.path.join(data_folder, dir_name, f'{dir_name}_all_trials_analyzed.csv')
    # if os.path.isfile(all_trials_analyzed_path):
    #     continue

    processed_session = utils.load_processed_session(data_folder, dir_name, file_name)
    all_trials = utils.load_all_trials(data_folder, dir_name)
    total_trial_list = helper.generate_total_trial_list(training_session_log, dir_name)

    for t in total_trial_list:
        trial = processed_session.loc[processed_session['session_trial_num'] == t]
        trial_performance = helper.get_trial_performance(trial)
        all_trials.loc[all_trials['session_trial_num'] == t, all_trials_data_column] = trial_performance
    
    all_trials.to_csv(all_trials_analyzed_path)

# Generate all blocks per session

load session log

In [None]:
training_session_log = utils.load_session_log(data_folder, 'training_sessions.csv')

### generate all blocks df and saves to raw data folder

In [None]:
all_blocks_column_names = ['block_num', 'blk_type', 'num_trials', 'start_time', 'end_time',
                           'bg_drawn_mean', 'bg_drawn_std', 'bg_length_mean', 'bg_length_std', 
                           'enl_repeats_mean', 'enl_repeats_std', 'num_miss_trials', 'time_waited_mean', 
                           'time_waited_std', 'reward_mean', 'reward_std', 'num_consumption_lick_mean', 
                           'num_consumption_lick_std']

In [None]:
def generate_all_blocks_df(column_names, total_block_list):
    """
    makes an empty df with each row being a trial, and each column with trial info
    trial number is added to the df
    """
    all_blocks = pd.DataFrame(columns=column_names)
    all_blocks['block_num'] = total_block_list
    return all_blocks

In [None]:
for dir_name, file_name in zip(training_session_log.dir, training_session_log.filename):
    filename = f'{dir_name}_all_blocks.csv'
    path = os.path.join(data_folder, dir_name, filename)
    # if os.path.isfile(path):
    #     continue

    all_trials_analyzed = utils.load_all_trials_analyzed(data_folder, dir_name)
    total_block_list = helper.generate_total_block_list(training_session_log, dir_name)
    all_blocks = helper.generate_all_blocks_df(all_blocks_column_names, total_block_list)
    
    for blk in total_block_list:
        block = all_trials_analyzed.loc[all_trials_analyzed['block_num'] == blk]
        block_data = helper.get_block_data(block)
        all_blocks.loc[all_blocks.block_num == blk, all_blocks_column_names[1:]] = block_data
        
    all_blocks.to_csv(path)

In [None]:
all_blocks

In [None]:
test = utils.load_all_blocks(data_folder, training_session_log.dir[2])

In [None]:
test