## Add new behavior sessions to metadata sheet from data folders

* Notebook to automatically add basepath, basename and video name to metadata.csv for SNLab behavior sessions


In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

### Set paths to dataset and Metadata 


In [74]:
dataset_path = r"Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7"
metadata_path = r"Y:\laura_berkowitz\behavior_validation\appps1_cpp\metadata.csv"

## Find all folders in Cohort 6 and interate through. If a video exists, then add to metadata dataframe. 

In [75]:
metadata_df = pd.read_csv(metadata_path)

# normalize the basepath in metadata_df
metadata_df['basepath'] =[os.path.normpath(x) for x in metadata_df['basepath'].values] 
datafolders = glob.glob(dataset_path+'**\*\**\*', recursive=True)

In [76]:
# remove paths that are not folders
datafolders = [x for x in datafolders if os.path.isdir(x)]

# remove folders within to_split folder 
datafolders = [os.path.normpath(x) for x in datafolders if 'to_split' not in x]
datafolders

['Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3984\\3984_cpptask_day04',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3984\\3984_habituation_day01',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3984\\3984_habituation_day02',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3984\\3984_habituation_day03',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3985\\3985_cpptask_day04',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3985\\3985_habituation_day01',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3985\\3985_habituation_day02',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3985\\3985_habituation_day03',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3986\\3986_cpptask_day04',
 'Y:\\laura_berkowitz\\behavior_validation\\appps1_cpp\\data\\cohort7\\3986\\3986_habituation_

## Iterater through each folder, if a video exists, add each video to the metadata dataframe, including basepath, subid, basename and video name. 

In [77]:
for basepath in datafolders: 
    # check if the folder contains a video file 
    vid_files = glob.glob(basepath + '\*.avi')
    basename = os.path.basename(basepath)
    subid = basename.split('_')[0]
    

    # if the folder contains a video file & the subid is not already in the metadata dataframe
    if (len(vid_files) > 0) & (basepath not in metadata_df['basepath'].values):
        # add each video file to the metadata dataframe
        for vid in vid_files:

            # concatenate the metadata dataframe with the new video file
            metadata_df = pd.concat([metadata_df, pd.DataFrame({'subid': subid, 'vidname': os.path.basename(vid).split('.')[0], 'basepath': basepath,'basename':basename}, index=[0])], ignore_index=True)


    elif basepath not in metadata_df['basepath'].values:
        
        metadata_df = pd.concat([metadata_df, pd.DataFrame({'subid': subid, 'vidname': 'MISSING', 'basepath': basepath,'basename':basename}, index=[0])], ignore_index=True)
        print(f'No video files found in {basepath}')

        continue
        
        

No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\3997\3997_cpptask_day04
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\3997\3997_habituation_day01
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\3997\3997_habituation_day02
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\3997\3997_habituation_day03
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\4013\4013_habituation_day02
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\4032\4032_cpptask_day04
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\4032\4032_habituation_day01
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\4032\4032_habituation_day02
No video files found in Y:\laura_berkowitz\behavior_validation\appps1_cpp\data\cohort7\4

In [78]:
metadata_df

Unnamed: 0,subid,basepath,sex,genotype,age,session_date,dob,vidname,basename,exposure,...,trial_stop_1,trial_start_2,trial_stop_2,trial_start_3,trial_stop_3,trial_start_4,trial_stop_4,trial_start_5,trial_stop_5,notes
0,3764,Y:\laura_berkowitz\behavior_validation\appps1_...,female,APPPS1,9.0,7/19/2023,10/12/2022,3764B_pairing2_A-07192023091159,3764_pairing_day02,2.0,...,27306.0,,,,,,,,,
1,3764,Y:\laura_berkowitz\behavior_validation\appps1_...,female,APPPS1,9.0,7/19/2023,10/12/2022,3764B_pairing2_B-07192023093033,3764_pairing_day02,2.0,...,27131.0,,,,,,,,,
2,3764,Y:\laura_berkowitz\behavior_validation\appps1_...,female,APPPS1,9.0,7/20/2023,10/12/2022,3764B_pairing3_A-07202023085140,3764_pairing_posttest_day03,3.0,...,27194.0,,,,,,,,,
3,3764,Y:\laura_berkowitz\behavior_validation\appps1_...,female,APPPS1,9.0,7/20/2023,10/12/2022,3764B_pairing3_B-07202023091025,3764_pairing_posttest_day03,3.0,...,27538.0,,,,,,,,,
4,3764,Y:\laura_berkowitz\behavior_validation\appps1_...,female,APPPS1,9.0,7/20/2023,10/12/2022,3764B_posttest-07202023112613,3764_pairing_posttest_day03,2.0,...,28280.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
589,4157,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4157_habituation_day02-07132024113904,4157_habituation_day02,,...,,,,,,,,,,
590,4157,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4157_habituation_day3-07142024103336,4157_habituation_day03,,...,,,,,,,,,,
591,4158,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4158_OF_day1-07122024145005,4158_habituation_day01,,...,,,,,,,,,,
592,4158,Y:\laura_berkowitz\behavior_validation\appps1_...,,,,,,4158_habituation_day02-07132024114614,4158_habituation_day02,,...,,,,,,,,,,


## Save the updated metadata dataframe back to folder

In [79]:
## Save the metadata to a csv file
metadata_df.to_csv(metadata_path, index=False)