#### Loading notebook
###### Code Ocean use only

In [3]:

%reload_ext autoreload
%autoreload 2
import numpy as np
import pandas as pd
import os

import glob
import re
from datetime import datetime
import sys
from pathlib import Path
from typing import Union, Dict, List, Optional, Tuple
import tqdm

from aind_dynamic_foraging_data_utils import nwb_utils as nu
import aind_dynamic_foraging_basic_analysis.licks.annotation as a

import sys
sys.path.insert(0, '/root/capsule/src/utils')
import behavior_utils as bu
import array_utils as au
import load_utils as lu

ModuleNotFoundError: No module named 'behavior_utils'

In [None]:

sys.path.insert(0, '/Users/nickkeesey/Desktop/learning-dynamics-behavior')

import process_nwbs as pn

##### Full Processing Notebook: 
###### 1. Use nwb_utils to load nwbs from base directory and create df_trials 
###### 2. Use compute_session_metadata / compute_session_performance to compute session metadata and performance metrics from df_trials
###### 3. Concatenate and compare to existing bonsai dataframe, clean and save


____
##### Full Session Processing:

##### First check if custom pipeline replicates the Bonsai pipeline

In [83]:

def process_nwb_to_df(nwb_path):
    """Process a single NWB file into a dataframe with meta and performance data 
    
    Args:
        nwb_path (str): Path to NWB file
        
    Returns:
        pd.DataFrame: Combined dataframe with meta and performance data
    """
    try:
        # Load NWB file
        nwb = nu.load_nwb_from_filename(nwb_path)
        
        # Compute trial, session meta, and session performance dataframes
        df_trials = pn.compute_df_trial(nwb)
        
        meta_df = pn.compute_df_session_meta(nwb, df_trials)
        meta_df.columns = meta_df.columns.droplevel(0)  # Remove 'metadata' level
        
        performance_df = pn.compute_df_session_performance(nwb, df_trials) 
        performance_df.columns = performance_df.columns.droplevel(0)  # Remove 'session_stats' level
        
        # Convert MultiIndex to flat index before concatenation
        if isinstance(meta_df.index, pd.MultiIndex):
            meta_df = meta_df.reset_index()
        if isinstance(performance_df.index, pd.MultiIndex):
            performance_df = performance_df.reset_index()
            
        # Concatenate the dataframes
        session_df = pd.concat([meta_df, performance_df], axis=1)
        
        return session_df
    
    except Exception as e:
        print(f"Error processing {nwb_path}: {str(e)}")
        return None
    

session_df = process_nwb_to_df('/Users/nickkeesey/Desktop/learning-dynamics-behavior/data/769887_2024-11-06_09-39-22.nwb')


In [84]:
# Check with real data
df_compare = pd.read_csv('/Users/nickkeesey/Desktop/learning-dynamics-behavior/data/filtered_data_20241107.csv')

subject = 769887
session_date = '2024-11-06'

# Flatten the MultiIndex columns in session_df
if isinstance(session_df.columns, pd.MultiIndex):
    session_df.columns = session_df.columns.get_level_values(-1)

# Match with single session nwb from above 
df_compare = df_compare[df_compare['subject_id'] == subject]
df_compare = df_compare[df_compare['session_date'] == session_date]

# Get columns from each dataframe
df_cols = set(session_df.columns)
df_compare_cols = set(df_compare.columns)

# Print columns that are in one dataframe but not the other
print("Columns in df but not in df_compare:")
print(df_cols - df_compare_cols)
print("\nColumns in df_compare but not in df:")
print(df_compare_cols - df_cols)

# Define a function to check if a value is a number
def is_number(value):
    try:
        float(value)
        return True
    except (TypeError, ValueError):
        return False

# Define a function to check if two values are close enough (b/t 4 sig figs)
def is_close_enough(val1, val2, sig_figs=4):
    if not (is_number(val1) and is_number(val2)):
        return val1 == val2
    
    # Convert to floats
    val1, val2 = float(val1), float(val2)
    
    # Handle zero values
    if val1 == 0 or val2 == 0:
        return abs(val1 - val2) < 1e-10
    
    # Calculate relative difference
    rel_diff = abs((val1 - val2) / max(abs(val1), abs(val2)))
    return rel_diff < 10**(-sig_figs)

# Compare values for common columns
common_cols = df_cols.intersection(df_compare_cols)
for col in common_cols:
    session_val = session_df[col].iloc[0]
    df_compare_val = df_compare[col].iloc[0]
    
    if pd.isna(session_val) and pd.isna(df_compare_val):
        print(f"{col}: Both NaN")
    elif is_close_enough(session_val, df_compare_val):
        print(f"{col}: Same value (within 4 sig figs)")
        print(f"  session_df: {session_val}")
        print(f"  df_compare: {df_compare_val}")
    else:
        print(f"{col}: Different values")
        print(f"  session_df: {session_val}")
        print(f"  df_compare: {df_compare_val}")

Columns in df but not in df_compare:
set()

Columns in df_compare but not in df:
{'logistic_Hattori2019_score_std', 'logistic_Hattori2019_bias', 'logistic_Bari2019_score_std', 'session_time', 'logistic_Miller2021_Choice_amp', 'logistic_Bari2019_bias', 'virus', 'logistic_Su2022_RewC_tau', 'logistic_Miller2021_Choice_x_Reward_amp', 'logistic_Miller2021_score_std', 'subject_genotype', 'logistic_Miller2021_Reward_amp', 'results', 'logistic_Bari2019_RewC_amp', 'logistic_Miller2021_Choice_tau', 'abs(logistic_Miller2021_bias)', 'data_source', 'has_ephys', 'logistic_Hattori2019_score_mean', 'logistic_Hattori2019_UnrC_tau', 'hardware', 'abs(logistic_Hattori2019_bias)', 'logistic_Miller2021_Choice_x_Reward_tau', 'avg_trial_length_in_seconds', 'weekday', 'logistic_Miller2021_bias', 'session_name', 'task_type', 'logistic_Hattori2019_RewC_amp', 'logistic_Hattori2019_RewC_tau', 'logistic_Su2022_score_std', 'injections', 'logistic_Bari2019_Choice_amp', 'water_after_session_last_session', 'old_bpod_se

  df_compare = pd.read_csv('/Users/nickkeesey/Desktop/learning-dynamics-behavior/data/filtered_data_20241107.csv')


##### Full Bonsai Processing:

In [12]:
def process_multiple_nwbs(nwb_dir, pattern="**/*.nwb"):
    """Process multiple NWB files from a directory and combine into one dataframe
    
    Args:
        nwb_dir (str): Directory containing NWB files
        pattern (str): Glob pattern to match NWB files. Default "**/*.nwb"
        
    Returns:
        pd.DataFrame: Combined dataframe with data from all NWB files
    """
    # Get list of all NWB files
    nwb_files = glob.glob(os.path.join(nwb_dir, pattern), recursive=True)
    
    if not nwb_files:
        raise ValueError(f"No NWB files found in {nwb_dir} matching pattern {pattern}")
        
    # Process each NWB file and collect results
    all_dfs = []
    for nwb_path in tqdm.tqdm(nwb_files, desc="Processing NWB files"):
        df = process_nwb_to_df(nwb_path)
        if df is not None:
            all_dfs.append(df)
            
    # Combine all dataframes
    if not all_dfs:
        raise ValueError("No valid dataframes were generated from the NWB files")
        
    combined_df = pd.concat(all_dfs, axis=0, ignore_index=True)
    
    return combined_df


nwb_directory = "/root/capsule/data/foraging_nwb_bonsai"
combined_df = process_multiple_nwbs(nwb_directory)

# Save results
combined_df.to_csv("combined_session_data.csv", index=False)
print(f"Successfully processed {len(combined_df)} sessions")

  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."

Error processing /root/capsule/data/foraging_nwb_bonsai/668463_2023-07-07.nwb: Session date from the metadata (2023-07-06) does not match that from json name (2023-07-07)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cach

Error processing /root/capsule/data/foraging_nwb_bonsai/668546_2023-06-14.nwb: Session date from the metadata (2023-06-13) does not match that from json name (2023-06-14)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cach

Error processing /root/capsule/data/foraging_nwb_bonsai/668551_2023-06-15.nwb: Session date from the metadata (2023-06-14) does not match that from json name (2023-06-15)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:   2%|▏         | 115/6764 [05:07<4:31:11,  2.45s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/668551_2023-06-15_1.nwb: Session date from the metadata (2023-06-14) does not match that from json name (2023-06-15)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:   2%|▏         | 116/6764 [05:09<3:59:59,  2.17s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/668551_2023-06-15_2.nwb: Session date from the metadata (2023-06-14) does not match that from json name (2023-06-15)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  reward_actual / reward_optimal_random_seed,
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:   2%|▏         | 118/6764 [05:14<4:13:53,  2.29s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/668551_2023-06-16.nwb: Session date from the metadata (2023-06-15) does not match that from json name (2023-06-16)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  reward_actual / reward_optimal_random_seed,
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because versio

Error processing /root/capsule/data/foraging_nwb_bonsai/684039_2023-11-14_09-44-22.nwb: Subject name from the metadata (695950) does not match that from json name (684039)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cach

Error processing /root/capsule/data/foraging_nwb_bonsai/694874_2023-11-27_11-09-41.nwb: Subject name from the metadata (684874) does not match that from json name (694874)!!


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cach

Error processing /root/capsule/data/foraging_nwb_bonsai/706893_2024-06-27_16-21-41 - Copy.nwb: invalid literal for int() with base 10: '162141  Copy'


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
 

Error processing /root/capsule/data/foraging_nwb_bonsai/708031_2024-07-12_08-37-49.nwb: Session date from the metadata (2024-07-29) does not match that from json name (2024-07-12)!!


  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring

Error processing /root/capsule/data/foraging_nwb_bonsai/718495_2024-08-09_19-07-29.nwb: Expecting value: line 1 column 1 (char 0)


  reward_optimal = np.nanmean(np.max([p_Ls, p_Rs], axis=0)) * len(p_Ls)
  reward_actual / reward_optimal_random_seed,
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(np.max([p_Ls, p_Rs], axis=0)) * len(p_Ls)
  reward_actual / reward_optimal_random_seed,
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(np.max([p_Ls, p_Rs], axis=0)) * len(p_Ls)
  reward_actual / reward_optimal_random_seed,
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.l

Error processing /root/capsule/data/foraging_nwb_bonsai/739199_2024-12-04_16-46-10.nwb: Subject name from the metadata (745505) does not match that from json name (739199)!!


  reward_optimal = np.nanmean(np.max([p_Ls, p_Rs], axis=0)) * len(p_Ls)
  reward_actual / reward_optimal_random_seed,
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
Processing NWB files:  68%|██████▊   | 4602/6764 [4:35:16<1:55:26,  3.20s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/741210_2024-11-26_17-28-04.nwb: Subject name from the metadata (728568) does not match that from json name (741210)!!


  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop

Error processing /root/capsule/data/foraging_nwb_bonsai/745505_2024-12-05_16-29-02.nwb: Subject name from the metadata (739199) does not match that from json name (745505)!!


  reward_optimal = np.nanmean(np.max([p_Ls, p_Rs], axis=0)) * len(p_Ls)
  reward_actual / reward_optimal_random_seed,
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(np.max([p_Ls, p_Rs], axis=0)) * len(p_Ls)
  reward_actual / reward_optimal_random_seed,
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[d

Error processing /root/capsule/data/foraging_nwb_bonsai/754580_2024-10-11_15-14-31.nwb: float division by zero


  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
Processing NWB files:  89%|████████▊ | 5992/6764 [6:02:27<29:32,  2.30s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/754769_2024-10-30_14-40-26.nwb: Subject name from the metadata (751769) does not match that from json name (754769)!!


  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_

Error processing /root/capsule/data/foraging_nwb_bonsai/757210_2024-11-22_17-17-48.nwb: Subject name from the metadata (756345) does not match that from json name (757210)!!


  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.

Error processing /root/capsule/data/foraging_nwb_bonsai/LA44_2023-06-09.nwb: 'NoneType' object has no attribute 'groups'


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:  97%|█████████▋| 6568/6764 [6:37:02<07:47,  2.38s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/XY_05_2023-11-08_09-23-42.nwb: 'NoneType' object has no attribute 'groups'


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:  97%|█████████▋| 6569/6764 [6:37:03<06:17,  1.94s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/XY_05_2023-11-08_09-29-11.nwb: 'NoneType' object has no attribute 'groups'


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:  97%|█████████▋| 6570/6764 [6:37:04<05:09,  1.60s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/XY_05_2023-11-08_09-29-52.nwb: 'NoneType' object has no attribute 'groups'


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:  97%|█████████▋| 6571/6764 [6:37:05<04:26,  1.38s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/XY_05_2023-11-08_09-31-29.nwb: 'NoneType' object has no attribute 'groups'


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:  97%|█████████▋| 6572/6764 [6:37:05<03:51,  1.21s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/XY_05_2023-11-08_09-34-54.nwb: 'NoneType' object has no attribute 'groups'


  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
  warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
Processing NWB files:  97%|█████████▋| 6573/6764 [6:37:06<03:26,  1.08s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/XY_05_2023-11-08_09-35-42.nwb: 'NoneType' object has no attribute 'groups'


  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc

Error processing /root/capsule/data/foraging_nwb_bonsai/behavior_745505_2024-12-06_15-16-08.nwb: Subject name from the metadata (739199) does not match that from json name (745505)!!


Processing NWB files:  98%|█████████▊| 6650/6764 [6:39:56<06:16,  3.30s/it]

Error processing /root/capsule/data/foraging_nwb_bonsai/behavior_745505_2024-12-09_15-05-45.nwb: Subject name from the metadata (745502) does not match that from json name (745505)!!


  reward_optimal = np.nanmean(np.max([p_Ls, p_Rs], axis=0)) * len(p_Ls)
  reward_actual / reward_optimal_random_seed,
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
  reward_optimal = np.nanmean(p_stars) * len(p_Ls)
  a = op(a[slice1], a[slice2])
  (df_trial.loc[(df_trial.animal_response != IGNORE), 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[df_trial.reward, 'n_lick_switches_gocue_stop'] > 0).sum()
  (df_trial.loc[(df_trial.animal_response != IGNORE) & (~df_trial.reward), 'n_lick_switches_gocue_stop'] > 0).sum()
Processing NWB files: 100%|██████████| 6764/6764 [6:47:30<00:00,  3.61s/it]


Successfully processed 6738 sessions


In [4]:
# Clean up modified bonsai dataframe

bonsai_df = pd.read_csv('/Users/nickkeesey/Desktop/learning-dynamics-behavior/data/sub_300_foraging_session_bonsai.csv')

# Drop all extraneous subject_ids
# Create mask for subject_ids that have exactly 6 digits
six_digit_mask = bonsai_df['subject_id'].astype(str).str.len() == 6

# Filter dataframe to keep only rows where subject_id has 6 digits
bonsai_df = bonsai_df[six_digit_mask].copy()

# Remove subject ID 111111
bonsai_df = bonsai_df[bonsai_df['subject_id'] != 111111].copy()

# Download cleaned bonsai dataframe
bonsai_df.to_csv('/Users/nickkeesey/Desktop/learning-dynamics-behavior/data/sub_300_foraging_session_bonsai_cleaned.csv', index=False)

____
##### 1. Trial Only Processing with subject_id and session_date columns

In [30]:
# Load nwbs from base directory with filenames matching valid regex patterns 
# Returns csv files into scratch folder (default)
lu.load_nwb_files(base_dir = base_dir)

OSError: [Errno 30] Read-only file system: '/root'

In [23]:
# Set source path for combined Pandas DataFrame creation
source_path = '/root/capsule/scratch'

# Create concatenated DataFrame using CSV files in source path
# Add subject_id and session_date categorical columns based on filename
processed_nwb_df = lu.process_csv_files(source_path)
print(processed_nwb_df.shape) 

Could not extract metadata from 0_2024-11-21.csv
Could not extract metadata from 1_2024-09-30.csv
Could not extract metadata from 45678_2024-09-11.csv
Could not extract metadata from 1_2024-04-13.csv
Could not extract metadata from 1_2024-09-16.csv
Could not extract metadata from 3_2024-09-23.csv
Could not extract metadata from 23456789_2024-10-31.csv
Could not extract metadata from 324567890_2024-10-31.csv
Could not extract metadata from 3_2024-10-21.csv
Could not extract metadata from 5_2024-09-09.csv
Could not extract metadata from 0_2024-12-04.csv
Could not extract metadata from 2_2024-08-14.csv
Could not extract metadata from 2_2024-08-05.csv
Could not extract metadata from 324567890_2024-11-01.csv
Could not extract metadata from 1_2024-04-19.csv
Could not extract metadata from 3_2024-10-07.csv
Could not extract metadata from 1_2024-04-06.csv
Could not extract metadata from 1_2024-08-19.csv
Could not extract metadata from 2345678_2024-11-01.csv
Could not extract metadata from 0_20

Unnamed: 0,trial,animal_response,rewarded_historyL,rewarded_historyR,bait_left,bait_right,base_reward_probability_sum,reward_probabilityL,reward_probabilityR,reward_random_number_left,...,reward_time_in_session,reward_time_in_trial,choice_time_in_session,choice_time_in_trial,earned_reward,extra_reward,subject_id,session_date,reward_delay,minimum_opto_interval
0,0,2.0,False,False,False,False,0.45,0.4,0.05,0.789076,...,,,,,False,False,687553,2023-12-01,,
1,1,2.0,False,False,False,False,0.45,0.4,0.05,0.518192,...,,,,,False,False,687553,2023-12-01,,
2,2,0.0,True,False,True,False,0.45,0.4,0.05,0.367511,...,16.160096,0.487072,16.153088,0.480064,True,False,687553,2023-12-01,,
3,3,0.0,True,False,True,False,0.45,0.4,0.05,0.053372,...,25.333088,0.376064,25.331872,0.374848,True,False,687553,2023-12-01,,
4,4,0.0,False,False,False,False,0.45,0.4,0.05,0.955191,...,,,34.364160,0.157440,False,False,687553,2023-12-01,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2577768,582,1.0,False,False,False,False,0.45,0.4,0.05,0.700221,...,,,5378.042624,0.199680,False,False,724555,2024-07-10,0.0,0.0
2577769,583,2.0,False,False,False,False,0.45,0.4,0.05,0.678467,...,,,,,False,False,724555,2024-07-10,0.0,0.0
2577770,584,1.0,False,False,True,False,0.45,0.4,0.05,0.050451,...,,,5392.707232,0.347424,False,False,724555,2024-07-10,0.0,0.0
2577771,585,1.0,False,False,True,False,0.45,0.4,0.05,0.118763,...,,,5404.919264,0.359232,False,False,724555,2024-07-10,0.0,0.0


In [24]:
# Set download location for combined DataFrame
filepath = '/root/capsule/data/file_process_output'

# Download DataFrame to selected filepath 
downloaded_path = lu.download_dataframe_to_csv(
    processed_nwb_df, 
    filepath=filepath, 
    filename='foraging_trials_bonsai.csv', 
    overwrite=True
)

In [2]:
# Check downloaded size 

file_size = os.path.getsize(filepath)
print("File size:", file_size, "bytes")

File size: 40 bytes
