## Add new behavior sessions to metadata sheet from data folders

* Notebook to automatically add basepath, basename and video name to metadata.csv for SNLab behavior sessions


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

### Set paths to dataset and Metadata 


In [24]:
dataset_path = r"Y:\laura_berkowitz\alz_stim\data"
metadata_path = r"Y:\laura_berkowitz\behavior_metadata.csv"

## Find all folders in Cohort 6 and interate through. If a video exists, then add to metadata dataframe. 

In [29]:
metadata_df = pd.read_csv(metadata_path)

# normalize the basepath in metadata_df
metadata_df['basepath'] =[os.path.normpath(x) for x in metadata_df['basepath'].values] 
datafolders = glob.glob(dataset_path+'**\*\*', recursive=True)

In [30]:
metadata_df

Unnamed: 0,subid,basepath,genotype,age,session_date,dob,vidname,basename,exposure,maze_length_cm,...,rewarded_arena,trial_start_1,trial_stop_1,trial_start_2,trial_stop_2,trial_start_3,trial_stop_3,trial_start_4,trial_stop_4,notes
0,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.0,6/28/2023,11/7/2022,beta_OL_learning_day03_20230628_134047740,beta_day03,1.0,30,...,na,1476.0,10637.0,,,,,,,
1,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.0,6/28/2023,11/7/2022,beta_OL_test_day03_20230628_175938541,beta_day03,1.0,30,...,na,2793.0,40101.0,,,,,,,
2,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/13/2023,11/7/2022,beta_context_day04-07132023135730,beta_day04_230713_113346,1.0,30,...,na,7606.0,44032.0,56644.0,93354.0,105788.0,143828.0,156588.0,197314.0,
3,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/24/2023,11/7/2022,beta_context_day06-07242023143151,beta_day06_230724_121943,2.0,30,...,na,10845.0,48313.0,71310.0,108400.0,120990.0,158680.0,173003.0,210012.0,
4,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/26/2023,11/7/2022,beta_context_day07-07262023124858,beta_day07_230726_102440,3.0,30,...,na,12675.0,50245.0,63361.0,100284.0,116559.0,154141.0,168526.0,205415.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,,,,,na,hpc17_day05_240822,,,...,,,,,,,,,,
559,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,,,,,hpc17_LT_day06-08232024132816,hpc17_day06_240823_095850,,,...,,,,,,,,,,
560,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,,,,,hpc17_LT_day07-08242024113743,hpc17_day07_240824_080253,,,...,,,,,,,,,,
561,hpc17,Y:\laura_berkowitz\app_ps1_ephys\data\hpc17\hp...,,,,,hpc17_LT_day08-08262024154709,hpc17_day08_240826_093245,,,...,,,,,,,,,,


In [31]:
# remove paths that are not folders
datafolders = [x for x in datafolders if os.path.isdir(x)]

# remove folders within to_split folder 
datafolders = [os.path.normpath(x) for x in datafolders if 'to_split' not in x]
datafolders

['Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day00_230623_110054',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day01',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day02',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day03',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day04_230713_113346',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day05_230717_123958',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day06_230724_121943',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day07_230726_102440',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day08_230807_160422',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day09_230808_112208',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day10_230810_124537',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day11_230811_123319',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day12_230814_122807',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\beta_day13',
 'Y:\\laura_berkowitz\\alz_stim\\data\\beta\\bet

## Iterater through each folder, if a video exists, add each video to the metadata dataframe, including basepath, subid, basename and video name. 

In [32]:
for basepath in datafolders: 
    # check if the folder contains a video file 
    vid_files = glob.glob(basepath + '\*.avi')
    basename = os.path.basename(basepath)
    subid = basename.split('_')[0]
    

    # if the folder contains a video file & the subid is not already in the metadata dataframe
    if (len(vid_files) > 0) & (basepath not in metadata_df['basepath'].values):
        # add each video file to the metadata dataframe
        for vid in vid_files:
            metadata_df = metadata_df.append({'subid': subid, 'vidname': os.path.basename(vid).split('.')[0], 'basepath': basepath,'basename':basename}, ignore_index=True)
    elif basepath not in metadata_df['basepath'].values:
        
        metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
        print(f'No video files found in {basepath}')
        continue
        
        

  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)


No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day00_230623_110054
No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day01
No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day02
No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day05_230717_123958


  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)


No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day08_230807_160422
No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day09_230808_112208
No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day11_230811_123319
No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day12_230814_122807


  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)
  metadata_df = metadata_df.append({'subid': subid, 'vidname': 'na', 'basepath': basepath,'basename':basename}, ignore_index=True)


No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_day19_230921_115722
No video files found in Y:\laura_berkowitz\alz_stim\data\beta\beta_testing
No video files found in Y:\laura_berkowitz\alz_stim\data\dyno\dyno_day02_231103_182439
No video files found in Y:\laura_berkowitz\alz_stim\data\dyno\dyno_test_231106_112841


  metadata_df = metadata_df.append({'subid': subid, 'vidname': os.path.basename(vid).split('.')[0], 'basepath': basepath,'basename':basename}, ignore_index=True)


In [33]:
metadata_df

Unnamed: 0,subid,basepath,genotype,age,session_date,dob,vidname,basename,exposure,maze_length_cm,...,rewarded_arena,trial_start_1,trial_stop_1,trial_start_2,trial_stop_2,trial_start_3,trial_stop_3,trial_start_4,trial_stop_4,notes
0,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.0,6/28/2023,11/7/2022,beta_OL_learning_day03_20230628_134047740,beta_day03,1.0,30,...,na,1476.0,10637.0,,,,,,,
1,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day03,APPPS1,7.0,6/28/2023,11/7/2022,beta_OL_test_day03_20230628_175938541,beta_day03,1.0,30,...,na,2793.0,40101.0,,,,,,,
2,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/13/2023,11/7/2022,beta_context_day04-07132023135730,beta_day04_230713_113346,1.0,30,...,na,7606.0,44032.0,56644.0,93354.0,105788.0,143828.0,156588.0,197314.0,
3,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/24/2023,11/7/2022,beta_context_day06-07242023143151,beta_day06_230724_121943,2.0,30,...,na,10845.0,48313.0,71310.0,108400.0,120990.0,158680.0,173003.0,210012.0,
4,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,APPPS1,8.0,7/26/2023,11/7/2022,beta_context_day07-07262023124858,beta_day07_230726_102440,3.0,30,...,na,12675.0,50245.0,63361.0,100284.0,116559.0,154141.0,168526.0,205415.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_day...,,,,,na,beta_day19_230921_115722,,,...,,,,,,,,,,
572,beta,Y:\laura_berkowitz\alz_stim\data\beta\beta_tes...,,,,,na,beta_testing,,,...,,,,,,,,,,
573,dyno,Y:\laura_berkowitz\alz_stim\data\dyno\dyno_day...,,,,,na,dyno_day02_231103_182439,,,...,,,,,,,,,,
574,dyno,Y:\laura_berkowitz\alz_stim\data\dyno\dyno_tes...,,,,,na,dyno_test_231106_112841,,,...,,,,,,,,,,


## Save the updated metadata dataframe back to folder

In [29]:
## Save the metadata to a csv file
metadata_df.to_csv(metadata_path, index=False)