## Add new behavior sessions to metadata sheet from data folders

* Notebook to automatically add basepath, basename and video name to metadata.csv for SNLab behavior sessions


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

### Set paths to dataset and Metadata 


In [2]:
dataset_path = r"Y:\laura_berkowitz\app_ps1_ephys\data"
metadata_path = r"Y:\laura_berkowitz\behavior_metadata.csv"

## Find all folders in Cohort 6 and interate through. If a video exists, then add to metadata dataframe. 

In [6]:
metadata_df = pd.read_csv(metadata_path)

# normalize the basepath in metadata_df
metadata_df['basepath'] =[os.path.normpath(x) for x in metadata_df['basepath'].values] 
datafolders = glob.glob(dataset_path+'**\*\*', recursive=True)

In [7]:
metadata_df

Unnamed: 0,subid,basepath,genotype,age,session_date,dob,vidname,basename,exposure,maze_length_cm,...,paradigm,trial_start_1,trial_stop_1,trial_start_2,trial_stop_2,trial_start_3,trial_stop_3,trial_start_4,trial_stop_4,notes
0,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,7.49,6/23/2023,11/7/2022,na,beta_day00_230623_110054,na,na,...,sleep,,,,,,,,,
1,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day01,APPPS1,7.59,6/26/2023,11/7/2022,na,beta_day01,na,na,...,sleep,,,,,,,,,
2,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day02,APPPS1,7.62,6/27/2023,11/7/2022,beta_day02_OLbase_20230627_143130482,beta_day02,1,30,...,ripple_prolongation,,,,,,,,,
3,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.66,6/28/2023,11/7/2022,beta_OL_learning_day03_20230628_134047740,beta_day03,2,30,...,ripple_prolongation,1476.0,10637.0,,,,,,,
4,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.66,6/28/2023,11/7/2022,beta_OL_test_day03_20230628_175938541,beta_day03,2,30,...,na,2793.0,40101.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
585,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,APPPS1,9.72,9/5/2024,11/14/2023,hpc17_context_day13-09052024144926,hpc17_day13_240905_103951,,30,...,BBAA,,,,,,,,,
586,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,APPPS1,9.89,9/10/2024,11/14/2023,hpc17_OLbaseline_day15-09102024141500,hpc17_day10_240828_094107,1,30,...,encoding,,,,,,,,,
587,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,APPPS1,9.89,9/10/2024,11/14/2023,hpc17_OLtest_day15-09102024165805,hpc17_day15_240910_103432,1,30,...,encoding,,,,,,,,,
588,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,APPPS1,9.92,9/11/2024,11/14/2023,hpc17_context_day16-09112024133524,hpc17_day16_240911_095103,,30,...,BABA,,,,,,,,,


In [8]:
# remove paths that are not folders
datafolders = [x for x in datafolders if os.path.isdir(x)]

# remove folders within to_split folder 
datafolders = [os.path.normpath(x) for x in datafolders if 'to_split' not in x]
datafolders

['Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day00_211026_134000',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day01_211027_123548',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day02_211028_123848',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day03_211029_153139',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day04_211030_130255',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day05_211101_140116',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day06_211102_144909',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day07_211103_083457',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day08_211104_093612',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day09_211105_093814',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day10_211108_111237',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day11_211109_121437',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\day12_211110_110332',
 'Y:\\laura_berkowitz\\app_ps1_ephys\\data\\hpc01\\

## Iterater through each folder, if a video exists, add each video to the metadata dataframe, including basepath, subid, basename and video name. 

In [9]:
for basepath in datafolders: 
    # check if the folder contains a video file 
    vid_files = glob.glob(basepath + '\*.avi')
    basename = os.path.basename(basepath)
    subid = basename.split('_')[0]
    

    # if the folder contains a video file & the subid is not already in the metadata dataframe
    if (len(vid_files) > 0) & (basepath not in metadata_df['basepath'].values):
        # add each video file to the metadata dataframe
        for vid in vid_files:
            # concatenate the metadata dataframe with the new video file
            metadata_df = metadata_df.append({'subid': subid, 'vidname': os.path.basename(vid).split('.')[0], 'basepath': basepath,'basename':basename}, ignore_index=True)
    elif basepath not in metadata_df['basepath'].values:
        
        metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
        print(f'No video files found in {basepath}')
        continue
        
        

AttributeError: 'DataFrame' object has no attribute 'append'

In [33]:
metadata_df

Unnamed: 0,subid,basepath,genotype,age,session_date,dob,vidname,basename,exposure,maze_length_cm,...,rewarded_arena,trial_start_1,trial_stop_1,trial_start_2,trial_stop_2,trial_start_3,trial_stop_3,trial_start_4,trial_stop_4,notes
0,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.0,6/28/2023,11/7/2022,beta_OL_learning_day03_20230628_134047740,beta_day03,1.0,30,...,na,1476.0,10637.0,,,,,,,
1,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.0,6/28/2023,11/7/2022,beta_OL_test_day03_20230628_175938541,beta_day03,1.0,30,...,na,2793.0,40101.0,,,,,,,
2,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/13/2023,11/7/2022,beta_context_day04-07132023135730,beta_day04_230713_113346,1.0,30,...,na,7606.0,44032.0,56644.0,93354.0,105788.0,143828.0,156588.0,197314.0,
3,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/24/2023,11/7/2022,beta_context_day06-07242023143151,beta_day06_230724_121943,2.0,30,...,na,10845.0,48313.0,71310.0,108400.0,120990.0,158680.0,173003.0,210012.0,
4,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/26/2023,11/7/2022,beta_context_day07-07262023124858,beta_day07_230726_102440,3.0,30,...,na,12675.0,50245.0,63361.0,100284.0,116559.0,154141.0,168526.0,205415.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,,,,,na,beta_day19_230921_115722,,,...,,,,,,,,,,
572,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_tes...,,,,,na,beta_testing,,,...,,,,,,,,,,
573,dyno,Y:\laura_berkowitz\alz_stim\data\dyno\dyno_day...,,,,,na,dyno_day02_231103_182439,,,...,,,,,,,,,,
574,dyno,Y:\laura_berkowitz\alz_stim\data\dyno\dyno_tes...,,,,,na,dyno_test_231106_112841,,,...,,,,,,,,,,


## Save the updated metadata dataframe back to folder

In [29]:
## Save the metadata to a csv file
metadata_df.to_csv(metadata_path, index=False)