# STEP 1: 1_preprocess_data.ipynb

Press SHIFT + ENTER to run code

### USER INPUT!
Specify where to store the csvs. change the data_root_dir

In [3]:
## Define project
project_name = 'project_lindsay_PsiloSNI_cohort'

## Say where your data holding your DLC-analyzed CSVs is stored
    # i.e. Apple: '/Users/justinjames/LUPE_Corder-Lab/+project_XXX+/dlc_csvs'
data_root_dir = '/Users/justinjames/LUPE_Corder-Lab/project_lindsay_PsiloSNI/dlc_csvs'

## Breakdown how your data is organized in the folders-- name of folders that are groups? within groups, name condition folders
    # i.e. groups = ['Group1', 'Group2','Group3']
    # i.e. conditions = ['control','experiment']
groups = ['1_BL', '2_30Min', '3_24Hr', '4_7Days']
conditions = ['SNI_Saline', 'Uninjured_Psilo', 'SNI_Psilo', 'Uninjured_Saline']

### Updating meta.py for project

In [4]:
import os

def update_meta_file(project_name):
    meta_file_path = '../utils/meta.py'
    
    groups_var = f"groups_{project_name} = {groups}"
    conditions_var = f"conditions_{project_name} = {conditions}"
    
    # Read the current contents of the meta file
    if os.path.exists(meta_file_path):
        with open(meta_file_path, 'r') as file:
            lines = file.readlines()
    else:
        lines = []

    # Check if the variables are already defined and update them if necessary
    groups_defined = False
    conditions_defined = False
    for i, line in enumerate(lines):
        if line.startswith(f"groups_{project_name} ="):
            lines[i] = groups_var + '\n'
            groups_defined = True
        elif line.startswith(f"conditions_{project_name} ="):
            lines[i] = conditions_var + '\n'
            conditions_defined = True

    # If the variables are not defined, add them to the end of the file
    if not groups_defined:
        lines.append(groups_var + '\n')
    if not conditions_defined:
        lines.append(conditions_var + '\n')

    # Write the updated contents back to the meta file
    with open(meta_file_path, 'w') as file:
        file.writelines(lines)
    
    print(f'Updated {meta_file_path} with project-specific groups and conditions.')

# Example usage
update_meta_file(project_name)

Updated ../utils/meta.py with project-specific groups and conditions.


### Main Code: store all data in dictionary
WAIT UNTIL PROCESSING DATA FINISHES

In [5]:
###### RUN DEPENDENCIES ######
import glob
import pickle
import os
import sys
if not os.path.join(os.path.abspath(''), '../') in sys.path:
    sys.path.append(os.path.join(os.path.abspath(''), '../'))
import numpy as np
import pandas as pd
from tqdm import notebook
from utils.feature_utils import filter_pose_noise

###### MAIN CODE ######
filenames = {key: [] for key in groups}
data = {key: [] for key in groups}
for group in notebook.tqdm(groups):
    filenames[group] = {key: [] for key in conditions}
    data[group] = {key: [] for key in conditions}
    for condition in notebook.tqdm(conditions):
        
        filenames[group][condition] = glob.glob(str.join('/', 
                                                              (data_root_dir,
                                                               f'{group}', 
                                                               f'{condition}', 
                                                               '*.csv')))
        data[group][condition] = {key: [] for key in [f'file{i}' 
                                                           for i in range(len(filenames[group][condition]))]}
        for i, csv in enumerate(notebook.tqdm(filenames[group][condition])):
            temp_df = pd.read_csv(csv, header=[0, 1, 2, 3], sep=",", index_col=0)
            selected_pose_idx = np.arange(temp_df.shape[1])
            idx_llh = selected_pose_idx[2::3]
            # the loaded sleap file has them too, so exclude for both
            idx_selected = [i for i in selected_pose_idx if i not in idx_llh]
            currdf_filt, _ = filter_pose_noise(temp_df, idx_selected=idx_selected, idx_llh=idx_llh, 
                                               llh_value=0.1)
            data[group][condition][f'file{i}'] = currdf_filt

###### WAIT UNTIL PROCESSING DATA FINISHES ######

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

In [6]:
###### STORE ALL DATA IN DICTIONARY ######  
directory = f"../processed_dataset/{project_name}/"
os.makedirs(directory, exist_ok=True)

raw_data_pkl_filename = f"../processed_dataset/{project_name}/raw_data_{project_name}.pkl"
with open(raw_data_pkl_filename, 'wb') as f:
    pickle.dump(data, f)

print(f'{raw_data_pkl_filename} is created and saved!')

../processed_dataset/project_lindsay_PsiloSNI_cohort/raw_data_project_lindsay_PsiloSNI_cohort.pkl is created and saved!


In [7]:
###### CHECK DATA STORED CORRECTLY IN DICTIONARY ######
from utils.classification import load_behaviors, load_data
data = load_data(f"../processed_dataset/{project_name}/raw_data_{project_name}.pkl")
data

{'1_BL': {'SNI_Saline': {'file0': array([[206.01417732, 634.04237676, 200.32245266, ..., 437.34568858,
           164.31680489, 408.90315171],
          [211.52418542, 640.07388639, 205.19246387, ..., 442.77082258,
           164.48430991, 413.89126015],
          [211.52418542, 640.07388639, 207.88437796, ..., 450.63022423,
           167.1436584 , 415.06194082],
          ...,
          [471.06660652, 634.98170519, 463.92079782, ..., 624.85881042,
           345.56349576, 605.26411963],
          [484.68834496, 614.66194531, 463.92079782, ..., 625.40074563,
           346.09944928, 605.43118584],
          [484.68834496, 614.66194531, 463.92079782, ..., 625.79646564,
           348.97815627, 609.18332386]]),
   'file1': array([[576.28118324, 140.75227571, 568.44366503, ..., 125.89640689,
           364.78420937, 132.92958474],
          [578.45678782, 143.66354483, 572.61970317, ..., 126.68117803,
           367.97262692, 132.33826923],
          [582.10913324, 146.10187972, 576.3955

# MOVE TO STEP 2!
2_preprocess_get_features.ipynb 