## Add new behavior sessions to metadata sheet from data folders

* Notebook to automatically add basepath, basename and video name to metadata.csv for SNLab behavior sessions


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

### Set paths to dataset and Metadata 


In [2]:
dataset_path = r"Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6"
metadata_path = r"Y:\laura_berkowitz\behavior_validation\appps1_cpp\metadata.csv"

## Find all folders in Cohort 6 and interate through. If a video exists, then add to metadata dataframe. 

In [3]:
metadata_df = pd.read_csv(metadata_path)
datafolders = glob.glob(dataset_path+'**\*\*', recursive=True)

In [4]:
metadata_df

Unnamed: 0,subid,basepath,sex,genotype,age,session_date,dob,vidname,basename,exposure,...,moved_object,trial_start_1,trial_stop_1,trial_start_2,trial_stop_2,trial_start_3,trial_stop_3,trial_start_4,trial_stop_4,notes
0,3764,Y://laura_berkowitz/behavior_validation/appps1...,female,APPPS1,9.0,7/19/2023,10/12/2022,3764B_pairing2_A-07192023091159,3764_pairing_day02,2.0,...,none,189.0,27306.0,,,,,,,
1,3764,Y://laura_berkowitz/behavior_validation/appps1...,female,APPPS1,9.0,7/19/2023,10/12/2022,3764B_pairing2_B-07192023093033,3764_pairing_day02,2.0,...,none,98.0,27131.0,,,,,,,
2,3764,Y://laura_berkowitz/behavior_validation/appps1...,female,APPPS1,9.0,7/20/2023,10/12/2022,3764B_pairing3_A-07202023085140,3764_pairing_posttest_day03,3.0,...,none,122.0,27194.0,,,,,,,
3,3764,Y://laura_berkowitz/behavior_validation/appps1...,female,APPPS1,9.0,7/20/2023,10/12/2022,3764B_pairing3_B-07202023091025,3764_pairing_posttest_day03,3.0,...,none,474.0,27538.0,,,,,,,
4,3764,Y://laura_berkowitz/behavior_validation/appps1...,female,APPPS1,9.0,7/20/2023,10/12/2022,3764B_posttest-07202023112613,3764_pairing_posttest_day03,2.0,...,none,605.0,28280.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,4222,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4222_habituation_day01-03092024080514,4222_cpptask_day01,,...,,,,,,,,,,
306,4222,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4222_habituation_day02-03102024080619,4222_cpptask_day02,,...,,,,,,,,,,
307,4223,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4223_habituation_day01-03082024080759,4223_cpptask_day01,,...,,,,,,,,,,
308,4223,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4223_habituation_day02-03092024075121,4223_cpptask_day02,,...,,,,,,,,,,


In [27]:
# remove paths that are not folders
datafolders = [x for x in datafolders if os.path.isdir(x)]

# remove folders within to_split folder 
datafolders = [x for x in datafolders if 'to_split' not in x]
datafolders

['Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4016\\4016_cpptask_day01',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4016\\4016_cpptask_day02',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4016\\4016_cpptask_day03',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4016\\4016_cpptask_day04',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4017\\4017_cpptask_day01',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4017\\4017_cpptask_day02',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4017\\4017_cpptask_day03',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4017\\4017_cpptask_day04',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4024\\4024_cpptask_day01',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort6\\4024\\4024_cpptask_day02',
 'Y:\\laura_berkowit

## Iterater through each folder, if a video exists, add each video to the metadata dataframe, including basepath, subid, basename and video name. 

In [28]:
for basepath in datafolders: 
    # check if the folder contains a video file 
    vid_files = glob.glob(basepath + '\*.avi')
    basename = os.path.basename(basepath)
    subid = basename.split('_')[0]

    # if the folder contains a video file & the subid is not already in the metadata dataframe
    if (len(vid_files) > 0) & (basepath not in metadata_df['basepath'].values):
        # add each video file to the metadata dataframe
        for vid in vid_files:
            metadata_df = metadata_df.append({'subid': subid, 'vidname': os.path.basename(vid).split('.')[0], 'basepath': basepath,'basename':basename}, ignore_index=True)
    else:
        print(f'No video files found in {basepath}')
        continue
        
        

No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4016\4016_cpptask_day01
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4016\4016_cpptask_day02
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4016\4016_cpptask_day03
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4016\4016_cpptask_day04
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4017\4017_cpptask_day01
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4017\4017_cpptask_day02
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4017\4017_cpptask_day03
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4017\4017_cpptask_day04
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort6\4024\4024_cpptask_day01
N

## Save the updated metadata dataframe back to folder

In [29]:
## Save the metadata to a csv file
metadata_df.to_csv(metadata_path, index=False)