## Add new behavior sessions to metadata sheet from data folders

* Notebook to automatically add basepath, basename and video name to metadata.csv for SNLab behavior sessions


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob

### Set paths to dataset and Metadata 


In [17]:
dataset_path = r"Y:\Becka Irwin\IVD\2025 Practice Open Field APP KI Mice\data"
metadata_path = r"Y:\Becka Irwin\IVD\2025 Practice Open Field APP KI Mice\metadata.csv"

## Find all folders in Cohort 6 and interate through. If a video exists, then add to metadata dataframe. 

In [22]:
metadata_df = pd.read_csv(metadata_path)

# normalize the basepath in metadata_df
metadata_df['basepath'] =[os.path.normpath(x) for x in metadata_df['basepath'].values] 
datafolders = glob.glob(dataset_path+'**\*\**\*', recursive=True)

In [23]:
# remove paths that are not folders
datafolders = [x for x in datafolders if os.path.isdir(x)]

# remove folders within to_split folder 
datafolders = [os.path.normpath(x) for x in datafolders if 'to_split' not in x]
datafolders

['Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\164N\\164N_open_field_day01',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\164N\\164N_open_field_day02',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\164N\\164N_open_field_day03',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\165L\\165L_open_field_day01',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\165L\\165L_open_field_day02',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\165L\\165L_open_field_day03',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\166R\\166R_open_field_day01',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\166R\\166R_open_field_day02',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\166R\\166R_open_field_day03',
 'Y:\\Becka Irwin\\IVD\\2025 Practice Open Field APP KI Mice\\data\\172N\\172N_open_field_day01',
 'Y:\\Becka Irwin\\I

## Iterater through each folder, if a video exists, add each video to the metadata dataframe, including basepath, subid, basename and video name. 

In [24]:
for basepath in datafolders: 
    # check if the folder contains a video file 
    vid_files = glob.glob(basepath + '\*.avi')
    basename = os.path.basename(basepath)
    subid = basename.split('_')[0]
    

    # if the folder contains a video file & the subid is not already in the metadata dataframe
    if (len(vid_files) > 0) & (basepath not in metadata_df['basepath'].values):
        # get index of basepath 
        idx = metadata_df[metadata_df['basepath'] == basepath].index
        # add each video file to the metadata dataframe
        for vid in vid_files:

            # concatenate the metadata dataframe with the new video file
            metadata_df = pd.concat([metadata_df, pd.DataFrame({'subid': subid, 'vidname': os.path.basename(vid).split('.')[0], 'basepath': basepath,'basename':basename}, index=[0])], ignore_index=True)

   # if the folder contains a video file & the subid is already in the metadata dataframe
    elif(len(vid_files) > 0) & (basepath not in metadata_df['basepath'].values):
        # get index of basepath 
        idx = metadata_df[metadata_df['basepath'] == basepath].index
        # add each video file to the metadata dataframe
        for vid in vid_files:
            metadata_df.iloc[idx,"vidname"] = vid

    elif basepath not in metadata_df['basepath'].values:
        
        metadata_df = pd.concat([metadata_df, pd.DataFrame({'subid': subid, 'vidname': 'MISSING', 'basepath': basepath,'basename':basename}, index=idx)], ignore_index=True)
        print(f'No video files found in {basepath}')

        continue
        
        

In [25]:
metadata_df

Unnamed: 0,subid,basepath,genotype,age,session_date,dob,vidname,basename,exposure,pixel_distance,...,trials_ID,trial_start_1,trial_stop_1,trial_start_2,trial_stop_2,trial_start_3,trial_stop_3,trial_start_4,trial_stop_4,notes
0,164N,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,3/25/2025,6/20/2024,164N_OF_day01-03252025081457,164N_open_field_day01,,,...,open_field,474.0,5107.0,,,,,,,
1,164N,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,,6/20/2024,MISSING,164N_open_field_day02,,,...,open_field,,,,,,,,,
2,164N,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,,6/20/2024,MISSING,164N_open_field_day03,,,...,open_field,,,,,,,,,
3,165L,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,3/25/2025,6/20/2024,165L_OF_day01-03252025082334,165L_open_field_day01,,,...,open_field,824.0,5582.0,,,,,,,
4,165L,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,,6/20/2024,MISSING,165L_open_field_day02,,,...,open_field,,,,,,,,,
5,165L,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,,6/20/2024,MISSING,165L_open_field_day03,,,...,open_field,,,,,,,,,
6,166R,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,3/25/2025,6/20/2024,166R_OF_day01-03252025083302,166R_open_field_day01,,,...,open_field,649.0,5268.0,,,,,,,
7,166R,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,,6/20/2024,MISSING,166R_open_field_day02,,,...,open_field,,,,,,,,,
8,166R,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,,6/20/2024,MISSING,166R_open_field_day03,,,...,open_field,,,,,,,,,
9,172N,Y:\Becka Irwin\IVD\2025 Practice Open Field AP...,APP-KI,,3/25/2025,7/1/2024,172N_OF_day01-03252025075714,172N_open_field_day01,,,...,open_field,679.0,5469.0,,,,,,,


## Save the updated metadata dataframe back to folder

In [7]:
## Save the metadata to a csv file and don't save the index
metadata_df.to_csv(metadata_path, index=False)