In [1]:
# Imports of all used packages and libraries
import sys
import os
import git
import glob
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
git_repo = git.Repo(".", search_parent_directories=True)
git_root = git_repo.git.rev_parse("--show-toplevel")
sys.path.insert(0, os.path.join(git_root, 'src'))

import utilities.helper
import trodes.read_exported

In [3]:
git_root

'C:/Users/short/Documents/GitHub/nose_poke_identifier'

In [4]:
def extract_floats(s):
    """
    Extracts all floats from a string and returns them as a list of strings.

    Parameters:
    - s (str): The string to extract floats from.

    Returns:
    - list: A list of strings, each representing a float found in the input string.
    """
    float_pattern = r"[-+]?\d*\.\d+|\d+"
    return [str(float(num)) for num in re.findall(float_pattern, s)]

In [5]:
# Path of the directory that contains the Spike Gadgets recording and the exported timestamp files
# Exported with this tool https://docs.spikegadgets.com/en/latest/basic/ExportFunctions.html
# Export these files:
    # -raw – Continuous raw band export.
    # -dio – Digital IO channel state change export.
    # -analogio – Continuous analog IO export.
INPUT_DIR = "./data"
OUTPUT_DIR = r"./proc" # where data is saved should always be shown in the inputs
TONE_DIN = "dio_ECU_Din1"
TONE_STATE = 1
os.makedirs(OUTPUT_DIR, exist_ok=True)
OUTPUT_PREFIX = "rce_pilot_2"

In [6]:
COLS_TO_KEEP = ['session_dir', 'recording', 'metadata_dir', 'metadata_file', 'original_file', 'filename',
                'session_path', 'all_subjects', 'current_subject', 'event_timestamps', 'video_name', 
                'video_timestamps', 'event_frames', 'first_item_data']

RAW_COLS_TO_KEEP = ['session_dir', 'recording', 'original_file', 'session_path', 'current_subject', 
                    'first_item_data', 'first_timestamp', 'all_subjects']

STATE_COLS_TO_KEEP = ['session_dir', 'metadata_file', 'event_timestamps', 'video_name', 
                      'video_timestamps', 'event_frames',]

same_columns = ['session_dir', 'video_name']
different_columns = ['metadata_file', 'event_frames', 'event_timestamps']

# ALL_SESSION_DIR = glob.glob("/scratch/back_up/reward_competition_extention/data/standard/2023_06_*/*.rec")
ALL_SESSION_DIR = glob.glob(r"./data/*.rec")

In [7]:
ALL_SESSION_DIR

['./data\\20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.rec']

In [8]:
# Saving the trodes data for each session
# Each key is a session name
# Each value is a dictionary of every recording file in that session
session_to_trodes_data = utilities.helper.create_recursive_dict()


# Saving the path of the session recording
session_to_path = {}

# Going through each session recording
# Which includes all the recordings from all the miniloggers and cameras
for session_path in ALL_SESSION_DIR:   
    try:
        # Getting the name of the session from the path
        session_basename = os.path.splitext(os.path.basename(session_path))[0]
        print("Current Session: {}".format(session_basename))
        # Reading the trodes data for every recording file in the session directory
        session_to_trodes_data[session_basename] = trodes.read_exported.organize_all_trodes_export(session_path)
        
        session_to_path[session_basename] = session_path
    except Exception as e: 
        print(e)

Current Session: 20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1
Skipping file 20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.dio_ECU_Din1.dat due to error: [Errno 2] No such file or directory: './data\\20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.rec\\20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.DIO\\20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.dio_ECU_Din1.dat'
Skipping file 20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.dio_ECU_Din2.dat due to error: [Errno 2] No such file or directory: './data\\20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.rec\\20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.DIO\\20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.dio_ECU_Din2.dat'
Skipping file 20230612_112630_standard_comp_to_training_D1_subj_1-1_t1b3L_box2_merged.dio_ECU_Din3.dat due to error: [Errno 2] 

In [9]:
for session_path in ALL_SESSION_DIR:   
    try:
        session_basename = os.path.splitext(os.path.basename(session_path))[0]
        print("Current Session: {}".format(session_basename))
        file_to_video_timestamps = {}
        for video_timestamps in glob.glob(os.path.join(session_path, "*cameraHWSync")):
            video_basename = os.path.basename(video_timestamps)
            print("Current Video Name: {}".format(video_basename))
            timestamp_array = trodes.read_exported.read_trodes_extracted_data_file(video_timestamps)
            if "video_timestamps" not in session_to_trodes_data[session_basename][session_basename]:
                session_to_trodes_data[session_basename][session_basename]["video_timestamps"] = defaultdict(dict)
            session_to_trodes_data[session_basename][session_basename]["video_timestamps"][video_basename.split(".")[-3]] = timestamp_array
    
    
    except Exception as e: 
        print(e)

Current Session: 20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1
Current Video Name: 20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.1.videoTimeStamps.cameraHWSync
Current Video Name: 20230612_112630_standard_comp_to_training_D1_subj_1-2_and_1-1.2.videoTimeStamps.cameraHWSync


  return np.dtype(dtype_spec)


In [10]:
# Creating a dataframe from the nested dictionary
trodes_metadata_df = pd.DataFrame.from_dict({(i,j,k,l): session_to_trodes_data[i][j][k][l] 
                           for i in session_to_trodes_data.keys() 
                           for j in session_to_trodes_data[i].keys()
                           for k in session_to_trodes_data[i][j].keys()
                           for l in session_to_trodes_data[i][j][k].keys()},
                           orient='index')

# Resetting the index and renaming the columns
trodes_metadata_df = trodes_metadata_df.reset_index()
trodes_metadata_df = trodes_metadata_df.rename(columns={'level_0': 'session_dir', 'level_1': 'recording', 'level_2': 'metadata_dir', 'level_3': 'metadata_file'}, errors="ignore")

# Adding the session path to the dataframe
trodes_metadata_df["session_path"] = trodes_metadata_df["session_dir"].map(session_to_path)

In [11]:
# Getting the dtype name of each column in the numpy array
trodes_metadata_df["first_dtype_name"] = trodes_metadata_df["data"].apply(lambda x: x.dtype.names[0])
# Getting the first item of each column in the numpy array
trodes_metadata_df["first_item_data"] = trodes_metadata_df["data"].apply(lambda x: x[x.dtype.names[0]])

# Same as above but for the last column
trodes_metadata_df["last_dtype_name"] = trodes_metadata_df["data"].apply(lambda x: x.dtype.names[-1])
trodes_metadata_df["last_item_data"] = trodes_metadata_df["data"].apply(lambda x: x[x.dtype.names[-1]])

In [12]:
def split_by_multiple_delimiters(s, delimiters):
    """
    Splits a string by multiple delimiters.

    Parameters:
    - s (str): The string to split.
    - delimiters (list): A list of delimiters to split the string by.

    Returns:
    - list: A list of substrings.
    """
    return re.split('|'.join(map(re.escape, delimiters)), s)


In [13]:
trodes_metadata_df["all_subjects"] = trodes_metadata_df["session_dir"].apply(lambda x: x.split("subj")[-1].strip("_").replace("-", "."))#.split("t")[0].strip("_").replace("_",".").split(".and."))
trodes_metadata_df["all_subjects"] = trodes_metadata_df["all_subjects"].apply(lambda x: sorted(extract_floats(x)))

trodes_metadata_df["current_subject"] = trodes_metadata_df["recording"].apply(lambda x: x.split("subj")[-1].strip("_").replace("-", ".").replace("_", "."))#.split("t")[0].strip("_").replace("_",".").split(".and."))
trodes_metadata_df["current_subject"] = trodes_metadata_df["current_subject"].apply(lambda x: str(extract_floats(x)[0]).strip())

In [14]:
METADATA_TO_KEEP = ['raw', 'DIO', 'video_timestamps']

trodes_metadata_df = trodes_metadata_df[trodes_metadata_df["metadata_dir"].isin(METADATA_TO_KEEP)]
trodes_metadata_df = trodes_metadata_df[~trodes_metadata_df["metadata_file"].str.contains("out")]
trodes_metadata_df = trodes_metadata_df[~trodes_metadata_df["metadata_file"].str.contains("coordinates")]
trodes_metadata_df = trodes_metadata_df.reset_index(drop=True)

trodes_raw_df = trodes_metadata_df[(trodes_metadata_df["metadata_dir"] == "raw") & (trodes_metadata_df["metadata_file"] == "timestamps")].copy()
trodes_raw_df["first_timestamp"] = trodes_raw_df["first_item_data"].apply(lambda x: x[0])
recording_to_first_timestamp = trodes_raw_df.set_index('session_dir')['first_timestamp'].to_dict()

In [15]:
trodes_raw_df

Unnamed: 0,session_dir,recording,metadata_dir,metadata_file,clock rate,fields,data,filename,session_path,first_dtype_name,first_item_data,last_dtype_name,last_item_data,all_subjects,current_subject,first_timestamp
