In [1]:
import os
import math

import analysis_session_helper_functions_v2 as helper
import analysis_utils as utils

import pandas as pd

In [2]:
cohort = 'cohort_2'
to_analyze = '509_clean'
data_dir = '/Users/rebekahzhang/Documents/shuler_lab/behavior_data'
data_folder = os.path.join(data_dir, cohort, to_analyze)
print(data_folder)

/Users/rebekahzhang/Documents/shuler_lab/behavior_data/cohort_2/509_clean


# Generates all session logs

In [3]:
session_log = helper.generate_all_session_log(data_folder)
session_log.head()

Unnamed: 0,date,mouse,dir,filename
0,2023-04-12,RZ013,2023-04-12_05-44-07_RZ013,data_RZ013_2023-04-12_05-44-07.txt
1,2023-05-09,RZ017,2023-05-09_13-26-19_RZ017,data_RZ017_2023-05-09_13-26-19.txt
2,2023-04-25,RZ015,2023-04-25_12-10-41_RZ015,data_RZ015_2023-04-25_12-10-41.txt
3,2023-05-02,RZ013,2023-05-02_11-18-24_RZ013,data_RZ013_2023-05-02_11-18-24.txt
4,2023-05-04,RZ015,2023-05-04_13-23-22_RZ015,data_RZ015_2023-05-04_13-23-22.txt


get a list of mouse names

In [4]:
# prints mouse names in data folder, check for weird ones and delete from data base
mouse_list = session_log.mouse.unique().tolist()
mouse_list.sort()
print(mouse_list)

['RZ012', 'RZ013', 'RZ014', 'RZ015', 'RZ016', 'RZ017']


get the type of training for session meta data and add to log

In [5]:
training_list = []
for dir_name, file_name in zip(session_log.dir, session_log.filename):
    session_meta = utils.load_session_meta(data_folder, dir_name, file_name)
    training = session_meta.training.tolist()[0]
    training_list.append(training)

session_log['training'] = training_list

add columns of basic info to each session to log

In [6]:
session = utils.load_session(data_folder, dir_name, file_name)

In [7]:
session_basics_columns = ['num_blocks', 'num_trials', 'rewards', 'time', 'proper_end']
for dir_name, file_name in zip(session_log.dir, session_log.filename):
    session = utils.load_session(data_folder, dir_name, file_name)
    session_basic = helper.get_session_basics(session)
    session_log.loc[session_log.dir == dir_name, session_basics_columns] = session_basic

### Focus only on regular training sessions

In [8]:
training_session_log = session_log.loc[session_log.training == 'regular'].sort_values('dir').reset_index()
training_session_log.head()

Unnamed: 0,index,date,mouse,dir,filename,training,num_blocks,num_trials,rewards,time,proper_end
0,12,2023-04-25,RZ012,2023-04-25_11-00-06_RZ012,data_RZ012_2023-04-25_11-00-06.txt,regular,3.0,176.0,541.35,1314.41,True
1,34,2023-04-25,RZ013,2023-04-25_11-25-20_RZ013,data_RZ013_2023-04-25_11-25-20.txt,regular,2.0,106.0,367.27,832.84,False
2,75,2023-04-25,RZ014,2023-04-25_11-43-17_RZ014,data_RZ014_2023-04-25_11-43-17.txt,regular,3.0,216.0,458.45,1452.66,True
3,2,2023-04-25,RZ015,2023-04-25_12-10-41_RZ015,data_RZ015_2023-04-25_12-10-41.txt,regular,3.0,184.0,645.04,1517.78,True
4,32,2023-04-25,RZ016,2023-04-25_12-38-31_RZ016,data_RZ016_2023-04-25_12-38-31.txt,regular,3.0,198.0,528.83,1380.37,True


### Examine quality of sessions
doesn't need to run when data folder is cleaned

check for missing sessions by the number of sessions in each training day

In [9]:
num_mice = 6
date_list = training_session_log.date.unique().tolist()
for date in date_list:
    data = training_session_log.loc[training_session_log['date'] == date]
    if len(data) < num_mice:
        print(date)

2023-04-27


check for same mouse with multiple sessions per day, prints out date and mouse if too many

In [10]:
# prints dates of the same mouse with multiple sessions
for d in training_session_log.date.unique().tolist():
    session_of_the_day = training_session_log.loc[training_session_log['date'] == d]
    for mouse in mouse_list:
        count = len(session_of_the_day.loc[session_of_the_day['mouse'] == mouse])
        if count > 1:
            print(d, mouse)   

check for short sessions, prints out dir

In [11]:
short_session = training_session_log.loc[(training_session_log['training'] == 'regular') & 
                                         (training_session_log['num_trials'] < 100)] 
print(short_session.dir)

Series([], Name: dir, dtype: object)


### Add number of days in training to training log

In [12]:
for mouse in mouse_list:
    total_days = sum(training_session_log.mouse == mouse)
    training_session_log.loc[training_session_log.mouse == mouse, 'days'] = list(range(total_days))

In [13]:
training_session_log.days.max()

10.0

### Saves all sessions log and training session log

In [14]:
filename = f'all_sessions.csv'
path = os.path.join(data_folder, filename)
session_log.to_csv(path, index=False)

In [15]:
filename = f'training_sessions.csv'
path = os.path.join(data_folder, filename)
training_session_log.to_csv(path)

# Generate all trials per session

load session log

In [16]:
training_session_log = utils.load_session_log(data_folder, '509_training_sessions.csv')

### Raw data processing and generate initial all trials df
adds trial numbers and states to hardware entries
create all trials df with trial basics added
both files saved, 
does't need to be rerun

In [17]:
all_trials_column_names = ['session_trial_num', 'block_trial_num', 'block_num', 'start_time', 'end_time']

In [18]:
for dir_name, file_name in zip(training_session_log.dir, training_session_log.filename):
    
    processed_path = os.path.join(data_folder, dir_name, f'processed_{file_name[:-4]}.csv')
    all_trials_path =os.path.join(data_folder, dir_name, f'{dir_name}_all_trials.csv')
    # if os.path.isfile(processed_path) and os.path.isfile(all_trials_path):
    #     continue

    session = utils.load_session(data_folder, dir_name, file_name)
    session['trial_time'] = ''
    total_trial_list = helper.generate_total_trial_list(training_session_log, dir_name)
    all_trials = pd.DataFrame(columns=all_trials_column_names)

    for t in total_trial_list:
        trial = session.loc[session['session_trial_num'] == t]
        trial_basics = helper.get_trial_basics(trial)
        helper.add_trial_time(session, t, trial, trial_basics)

        trial_basics = pd.DataFrame([trial_basics])
        all_trials = pd.concat([all_trials, trial_basics], ignore_index=True)
    
    session = utils.trim_session(training_session_log, dir_name, session)
    session.to_csv(processed_path)
    all_trials.to_csv(all_trials_path)

### Adding analyzed trial data to all trials df

In [19]:
all_trials_data_column = ['bg_drawn', 'blk_type', 'bg_length', 'enl_repeats', 
                          'miss_trial', 'time_waited', 'reward', 'num_consumption_lick']

In [20]:
def get_trial_bg_data(trial):
    bg_start_idx = trial.index[(trial['key'] == 'trial') & (trial['value'] == 1)].tolist()
    bg_end_idx = trial.index[(trial['key'] == 'wait') & (trial['value'] == 1)].tolist()
    trial_bg = trial.loc[bg_start_idx[0] : bg_end_idx[0]]
    bg_drawn = float(trial_bg.iloc[0]['time_bg'])
    if bg_drawn < 2:
        blk_type = 's'
    elif bg_drawn > 2:
        blk_type = 'l'
    bg_length = trial_bg.session_time.max() - trial_bg.session_time.min()
    enl_repeats = trial['key'].value_counts()['enl']
    return [bg_drawn, blk_type, bg_length, enl_repeats]  

In [21]:
def get_trial_wait_data(trial):
    """gets 3 values about trial performance, takes trial raw data as input"""
    wait_start_time = trial.loc[(trial['key'] == 'wait') & (trial['value'] == 1), 'session_time'].iloc[0]
    if 'consumption' in trial.key.unique():
        miss_trial = False
        reward = trial.loc[trial['key'] == 'consumption', 'reward_size'].iloc[0]
        consumption_start_time = trial.loc[trial['key'] == 'consumption', 'session_time'].iloc[0]
        time_waited = consumption_start_time - wait_start_time
        consumption = trial.loc[trial['state'] == 'in_consumption']
        num_consumption_lick = len(consumption.loc[(consumption['key'] == 'lick') & (trial['value'] == 1)])
    else:
        miss_trial = True
        reward = math.nan
        time_waited = math.nan
        num_consumption_lick = math.nan
    return [miss_trial, time_waited, reward, num_consumption_lick]

In [22]:
def get_trial_performance(trial):
    bg_data = get_trial_bg_data(trial)
    wait_data = get_trial_wait_data(trial)
    return [bg_data + wait_data]

In [23]:
for dir_name, file_name in zip(training_session_log.dir, training_session_log.filename):
    all_trials_analyzed_path =os.path.join(data_folder, dir_name, f'{dir_name}_all_trials_analyzed.csv')
    # if os.path.isfile(all_trials_analyzed_path):
    #     continue

    processed_session = utils.load_processed_session(data_folder, dir_name, file_name)
    all_trials = utils.load_all_trials(data_folder, dir_name)
    total_trial_list = helper.generate_total_trial_list(training_session_log, dir_name)

    for t in total_trial_list:
        trial = processed_session.loc[processed_session['session_trial_num'] == t]
        trial.loc[trial['key'] == 'reward', 'key'] = 'consumption'
        trial_performance = get_trial_performance(trial)
        all_trials.loc[all_trials['session_trial_num'] == t, all_trials_data_column] = trial_performance
    
    all_trials.to_csv(all_trials_analyzed_path)

# Generate all blocks per session

load session log

In [24]:
training_session_log = helper.load_session_log(data_folder, '509_training_sessions.csv')

AttributeError: module 'analysis_session_helper_functions_v2' has no attribute 'load_session_log'

### generate all blocks df and saves to raw data folder

In [None]:
all_blocks_column_names = ['block_num', 'blk_type', 'num_trials', 'start_time', 'end_time',
                           'bg_drawn_mean', 'bg_drawn_std', 'bg_length_mean', 'bg_length_std', 
                           'enl_repeats_mean', 'enl_repeats_std', 'num_miss_trials', 'time_waited_mean', 
                           'time_waited_std', 'reward_mean', 'reward_std', 'num_consumption_lick_mean', 
                           'num_consumption_lick_std']

In [None]:
for dir_name, file_name in zip(training_session_log.dir, training_session_log.filename):
    filename = f'{dir_name}_all_blocks.csv'
    path = os.path.join(data_folder, dir_name, filename)
    # if os.path.isfile(path):
    #     continue

    all_trials_analyzed = helper.load_all_trials_analyzed(data_folder, dir_name)
    total_block_list = helper.generate_total_block_list(training_session_log, dir_name)
    all_blocks = helper.generate_all_blocks_df(all_blocks_column_names, total_block_list)
    
    for blk in total_block_list:
        block = all_trials_analyzed.loc[all_trials_analyzed['block_num'] == blk]
        block_data = helper.get_block_data(block)
        all_blocks.loc[all_blocks.block_num == blk, all_blocks_column_names[1:]] = block_data
        
    all_blocks.to_csv(path, index=False)