### Import libraries

In [2]:
import os
import sys
from datetime import datetime
import re
import yaml
import numpy as np
import pandas as pd
pd.set_option('mode.chained_assignment', None)
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import matplotlib.path as mpath
import seaborn as sns
from scipy.stats import ks_2samp, gaussian_kde
from scipy.stats import ttest_ind
from statsmodels.distributions.empirical_distribution import ECDF

# Add the parent directory of current directory to the Python path
sys.path.insert(0, os.path.abspath('..'))

from src.utils.process_session import *
from src.utils.optostim import *

### Set folder locations for analysed behaviour and keypoint data 

In [3]:
# folder locations for analysed data
Experiment = "DLS_GtACR_opto-inhibition"
Animals = ["SP108", "SP110", "SP112", "SP148", "SP149", "SP150", "SP151", "SP152", "SP153"] ## "SP150"
Group = ["DLS-GtACR", "DLS-GtACR", "DLS-GtACR", "DLS-GtACR", "DLS-GtACR", "DLS-GtACR", "DLS-GtACR", "DLS-GtACR", "DLS-GtACR"]
Camera_Folder = "/mnt/ceph/projects/sequences/SP_FlyCap"
data_Folder = "/mnt/ceph/projects/sequences/analysed_data"
deeplabcut_folder = "/Users/sthitapati/Library/CloudStorage/GoogleDrive-sthita.pati@gmail.com/My Drive/sequence_data/deeplabcut_keypoints"

# output folder is in data_folder/analysed_data/behaviour_output/Experiment
Output_Folder = os.path.join(data_Folder, "behaviour_output", Experiment)


In [4]:
# save path for figures
save_folder = os.path.join(Output_Folder, "plots")
if not os.path.exists(save_folder):
    os.makedirs(save_folder)
    print("Created folder: ", save_folder)
else:
    print("Folder already exists: ", save_folder)


Folder already exists:  /mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/plots


In [5]:
# now loop through all Animals and concat the dataframes to create allsessions_DF

for i, current_animal in enumerate(Animals):
    print(current_animal)
    all_sessions_file_name = os.path.join(Output_Folder, current_animal, f'{current_animal}_transition_data_all_sessions.csv')
    print(all_sessions_file_name)
    current_DF = pd.read_csv(all_sessions_file_name)
    if i == 0:
        allsessions_DF = current_DF
    else:
        allsessions_DF = pd.concat([allsessions_DF, current_DF], axis=0)
# convert allsessions_DF['date'] to datetime
allsessions_DF['date'] = pd.to_datetime(allsessions_DF['date'])
allsessions_DF.head()

SP108
/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/SP108/SP108_transition_data_all_sessions.csv
SP110
/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/SP110/SP110_transition_data_all_sessions.csv
SP112
/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/SP112/SP112_transition_data_all_sessions.csv
SP148
/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/SP148/SP148_transition_data_all_sessions.csv
SP149
/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/SP149/SP149_transition_data_all_sessions.csv
SP150
/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/SP150/SP150_transition_data_all_sessions.csv
SP151
/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/SP151/SP151_transition_data_all_sessions.csv
SP152
/mnt/ceph/projects/sequences/analys

Unnamed: 0.1,Unnamed: 0,trial_id,transition_type,start_poke_port,end_poke_port,start_poke_in_timestamp,start_poke_out_timestamp,end_poke_in_timestamp,end_poke_out_timestamp,out_in_latency,...,camera_port_in_times,camera_port_out_times,camera_trial_start_times,camera_trial_end_times,cumulative_trial_id,session_id,date,day,animal_id,group
0,0,1,33,3,3,252.4712,252.4845,252.4927,254.1755,0.0082,...,,,,,1,0,2023-04-21,Fri,SP108,Control
1,1,1,37,3,7,252.4927,254.1755,319.323,320.5142,65.1475,...,,,,,1,0,2023-04-21,Fri,SP108,Control
2,2,1,73,7,3,319.323,320.5142,321.0197,321.0275,0.5055,...,,,,,1,0,2023-04-21,Fri,SP108,Control
3,3,1,33,3,3,321.0197,321.0275,321.0635,321.184,0.036,...,,,,,1,0,2023-04-21,Fri,SP108,Control
4,4,1,37,3,7,321.0635,321.184,328.9522,330.1291,7.7682,...,,,,,1,0,2023-04-21,Fri,SP108,Control


In [6]:
# get session settings loop through all Animals and concat the dataframes to create session_settings_DF
# append name of current animal to the session_settings_DF

for i, current_animal in enumerate(Animals):
    print(current_animal)
    current_DF = get_session_details(Output_Folder, current_animal)
    current_DF['Animal'] = current_animal
    if i == 0:
        session_details_DF = current_DF
    else:
        session_details_DF = pd.concat([session_details_DF, current_DF], axis=0)

session_details_DF.head()

SP108
SP110
SP112
SP148
SP149
SP150
SP151
SP152
SP153


Unnamed: 0,session_id,session,date,file_number,day,opto_session,stim_port,opto_chance,pulse_duration,pulse_interval,train_duration,train_delay,variable_train_delay,mu_variable_delay,sigma_variable_delay,lower_bound_variableDelay,upper_bound_variableDelay,experiment_type,Animal
0,0,00_20230421_104635_Fri,2023-04-21,104635,Fri,False,,,,,,,False,,,,,1_Training,SP108
1,1,01_20230422_142310_Sat,2023-04-22,142310,Sat,False,,,,,,,False,,,,,1_Training,SP108
2,2,02_20230422_142611_Sat,2023-04-22,142611,Sat,False,,,,,,,False,,,,,1_Training,SP108
3,3,03_20230423_181053_Sun,2023-04-23,181053,Sun,False,,,,,,,False,,,,,1_Training,SP108
4,4,04_20230424_110840_Mon,2023-04-24,110840,Mon,False,,,,,,,False,,,,,1_Training,SP108


In [7]:
# load the opto_sessions_file that has manually assigned dated for analysis of opto sessions
# this is done because some of the sessions were not included due to bugs in codes 

# Constants & file loading
opto_session_file_path = '/mnt/ceph/projects/sequences/opto_session_dates.yaml'

with open(opto_session_file_path) as f:
    opto_session_dates = yaml.load(f, Loader=yaml.FullLoader)

# opto_session_dates['SP108']

In [8]:
# print opto_session_dates for all animals

sessions_to_include = []

for animal_id, dates in opto_session_dates.items():
    mask = (allsessions_DF['animal_id'] == animal_id) & (allsessions_DF['date'].isin(dates))
    sessions_to_include.append(allsessions_DF[mask])

allsessions_DF = pd.concat(sessions_to_include)
allsessions_DF.head()

Unnamed: 0.1,Unnamed: 0,trial_id,transition_type,start_poke_port,end_poke_port,start_poke_in_timestamp,start_poke_out_timestamp,end_poke_in_timestamp,end_poke_out_timestamp,out_in_latency,...,camera_port_in_times,camera_port_out_times,camera_trial_start_times,camera_trial_end_times,cumulative_trial_id,session_id,date,day,animal_id,group
152846,0,1,73,7,3,231.908,232.1617,232.5651,232.7351,0.4034,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152847,1,1,31,3,1,232.5651,232.7351,242.7525,242.8526,10.0174,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152848,2,1,12,1,2,242.7525,242.8526,243.2966,243.2967,0.444,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152849,3,1,22,2,2,243.2966,243.2967,243.2968,243.5042,0.0001,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152850,4,1,26,2,6,243.2968,243.5042,244.0704,244.1507,0.5662,...,,,,,19644,56,2023-08-04,Fri,SP108,Control


In [9]:

variable_delay = False

# filter session_id from session_details_DF that have opto_seesions = True and variable_delay = variable_delay
filtered_sessions = session_details_DF.loc[(session_details_DF['opto_session'] == True) & (session_details_DF['variable_train_delay'] == variable_delay)]
# create a dictionary of animal and date list
filtered_sessions_dict = filtered_sessions.groupby('Animal')['date'].apply(list).to_dict()


filtered_sessions = []

for animal_id, dates in filtered_sessions_dict.items():
    mask = (allsessions_DF['animal_id'] == animal_id) & (allsessions_DF['date'].isin(dates))
    filtered_sessions.append(allsessions_DF[mask])

filtered_DF = pd.concat(filtered_sessions)

filtered_DF['animal_id'].unique()
# filtered_DF.head()

array(['SP108', 'SP110', 'SP112', 'SP148', 'SP149', 'SP151', 'SP152',
       'SP153'], dtype=object)

In [10]:
# filter sessions for training level = 50 is non-assisted and 13 is assisted
training_level = 50
filtered_DF = filtered_DF[filtered_DF['training_level'] == training_level]
filtered_DF.head()

filtered_DF['animal_id'].unique()

array(['SP108', 'SP110', 'SP112', 'SP148', 'SP151', 'SP153'], dtype=object)

In [11]:
# loop through animal_id and session_id in filtered_DF and drop the last rows = num_trials_to_drop trials for each session_id in each animal_id
num_trials_to_drop = 20

for animal_id in filtered_DF['animal_id'].unique():
    for session_id in filtered_DF['session_id'].loc[filtered_DF['animal_id'] == animal_id].unique():
        mask = (filtered_DF['animal_id'] == animal_id) & (filtered_DF['session_id'] == session_id)
        filtered_DF.drop(filtered_DF[mask].tail(num_trials_to_drop).index, inplace=True)

filtered_DF.head()

Unnamed: 0.1,Unnamed: 0,trial_id,transition_type,start_poke_port,end_poke_port,start_poke_in_timestamp,start_poke_out_timestamp,end_poke_in_timestamp,end_poke_out_timestamp,out_in_latency,...,camera_port_in_times,camera_port_out_times,camera_trial_start_times,camera_trial_end_times,cumulative_trial_id,session_id,date,day,animal_id,group
152846,0,1,73,7,3,231.908,232.1617,232.5651,232.7351,0.4034,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152847,1,1,31,3,1,232.5651,232.7351,242.7525,242.8526,10.0174,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152848,2,1,12,1,2,242.7525,242.8526,243.2966,243.2967,0.444,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152849,3,1,22,2,2,243.2966,243.2967,243.2968,243.5042,0.0001,...,,,,,19644,56,2023-08-04,Fri,SP108,Control
152850,4,1,26,2,6,243.2968,243.5042,244.0704,244.1507,0.5662,...,,,,,19644,56,2023-08-04,Fri,SP108,Control


In [12]:
# animal_id to drop from the filtered_DF in the list animal_ids_to_drop
animal_ids_to_drop = []
filtered_DF = filtered_DF[~filtered_DF['animal_id'].isin(animal_ids_to_drop)]
filtered_DF['animal_id'].unique()

array(['SP108', 'SP110', 'SP112', 'SP148', 'SP151', 'SP153'], dtype=object)

In [13]:
# get the transition data for all sessions with opto_condition == 1

# For trials where opto_condition is 1 (Opto)
allsessions_DF_opto = filtered_DF[filtered_DF['opto_condition'] == 1]

# drop all rows where opto_duration is not equal to NaN or opto_duration
opto_duration = 1.5
allsessions_DF_opto = allsessions_DF_opto[allsessions_DF_opto['opto_duration'] == opto_duration]


# For trials where opto_condition is NaN (Control)
control_df = filtered_DF[filtered_DF['opto_condition'].isna()]

In [14]:
allsessions_DF_opto.head()

Unnamed: 0.1,Unnamed: 0,trial_id,transition_type,start_poke_port,end_poke_port,start_poke_in_timestamp,start_poke_out_timestamp,end_poke_in_timestamp,end_poke_out_timestamp,out_in_latency,...,camera_port_in_times,camera_port_out_times,camera_trial_start_times,camera_trial_end_times,cumulative_trial_id,session_id,date,day,animal_id,group
160707,0,1,22,2,2,29.7788,29.7789,29.779,29.8424,0.0001,...,114.992125,114.992225,114.992125,122.121625,20661,58,2023-08-09,Wed,SP108,Control
160708,1,1,22,2,2,29.779,29.8424,30.3803,30.7357,0.5379,...,114.992325,115.055725,114.992125,122.121625,20661,58,2023-08-09,Wed,SP108,Control
160709,2,1,21,2,1,30.3803,30.7357,31.0026,31.3881,0.2669,...,115.593625,115.949025,114.992125,122.121625,20661,58,2023-08-09,Wed,SP108,Control
160710,3,1,16,1,6,31.0026,31.3881,31.787,32.1072,0.3989,...,116.215925,116.601425,114.992125,122.121625,20661,58,2023-08-09,Wed,SP108,Control
160711,4,1,63,6,3,31.787,32.1072,32.3964,32.4308,0.2892,...,117.000325,117.320525,114.992125,122.121625,20661,58,2023-08-09,Wed,SP108,Control


In [15]:
control_df.head()
control_df['animal_id'].unique()

array(['SP108', 'SP110', 'SP112', 'SP148', 'SP151', 'SP153'], dtype=object)

In [16]:
# Create mapping from old port numbers to new names
port_mapping = {2: 'port 1', 1: 'port 2', 6: 'port 3', 3: 'port 4', 7: 'port 5'}

# If there are other ports not included in the correct sequence, map them to X, Y, Z
# Note: replace 4, 5, 8 with actual port numbers if different
port_mapping.update({4: 'port X', 5: 'port Y', 8: 'port Z'})

In [17]:
# filter opto_df for opto_condition == 1 and stim_duration == 1.5
# print(opto_df['stim_duration'].unique()) for debugging

opto_df_1_5 = allsessions_DF_opto.copy()
opto_df_1_5['opto_duration'].unique()

array([1.5])

In [22]:
# set colorbar value range 0-0.9
# cbar = ax.collections[0].colorbar
# cbar.set_ticks([0, 0.9])
optostim_port_1 = opto_df_1_5[opto_df_1_5['opto_stimulated_port'] == 1]
optostim_port_2 = opto_df_1_5[opto_df_1_5['opto_stimulated_port'] == 2]
optostim_port_3 = opto_df_1_5[opto_df_1_5['opto_stimulated_port'] == 3]
optostim_port_4 = opto_df_1_5[opto_df_1_5['opto_stimulated_port'] == 4]


In [20]:
# print(Output_Folder)
keypoints_path = os.path.join(Output_Folder, 'allxy_df.csv')
print(keypoints_path)
allxy_df = pd.read_csv(keypoints_path)
allxy_df.head()

/mnt/ceph/projects/sequences/analysed_data/behaviour_output/DLS_GtACR_opto-inhibition/allxy_df.csv


Unnamed: 0,x,y,trial_id,animal_id,date
0,683.318665,467.440328,21066,SP108,2023-08-10
1,680.025859,467.94696,21066,SP108,2023-08-10
2,677.233053,468.147319,21066,SP108,2023-08-10
3,674.268331,467.651845,21066,SP108,2023-08-10
4,672.653198,467.897573,21066,SP108,2023-08-10


In [26]:
# get all unique cumulative_trial_id, animal_id from optostim_port_1
cumulative_trial_id = optostim_port_1['cumulative_trial_id'].unique()
animal_id = optostim_port_1['animal_id'].unique()

# for each animal_id and cumulative_trial_id get the x, y coordinates from allxy_df
# store it in a new dataframe optostim_port_1_xy
optostim_port_1_xy = allxy_df[(allxy_df['trial_id'].isin(cumulative_trial_id)) & (allxy_df['animal_id'].isin(animal_id))]
# optostim_port_1_xy.info()
# save optostim_port_1_xy to a csv file
optostim_port_1_xy_file = os.path.join(Output_Folder, 'optostim_port_1_xy.csv')
optostim_port_1_xy.to_csv(optostim_port_1_xy_file, index=False)

In [27]:
# get all unique cumulative_trial_id, animal_id from optostim_port_2
cumulative_trial_id = optostim_port_2['cumulative_trial_id'].unique()
animal_id = optostim_port_2['animal_id'].unique()

# for each animal_id and cumulative_trial_id get the x, y coordinates from allxy_df
# store it in a new dataframe optostim_port_2_xy
optostim_port_2_xy = allxy_df[(allxy_df['trial_id'].isin(cumulative_trial_id)) & (allxy_df['animal_id'].isin(animal_id))]
# optostim_port_2_xy.info()
# save optostim_port_2_xy to a csv file
optostim_port_2_xy_file = os.path.join(Output_Folder, 'optostim_port_2_xy.csv')
optostim_port_2_xy.to_csv(optostim_port_2_xy_file, index=False)

In [28]:
# get all unique cumulative_trial_id, animal_id from optostim_port_3
cumulative_trial_id = optostim_port_3['cumulative_trial_id'].unique()
animal_id = optostim_port_3['animal_id'].unique()

# for each animal_id and cumulative_trial_id get the x, y coordinates from allxy_df
# store it in a new dataframe optostim_port_3_xy
optostim_port_3_xy = allxy_df[(allxy_df['trial_id'].isin(cumulative_trial_id)) & (allxy_df['animal_id'].isin(animal_id))]
# optostim_port_3_xy.info()
# save optostim_port_3_xy to a csv file
optostim_port_3_xy_file = os.path.join(Output_Folder, 'optostim_port_3_xy.csv')
optostim_port_3_xy.to_csv(optostim_port_3_xy_file, index=False)

In [29]:
# get all unique cumulative_trial_id, animal_id from optostim_port_4
cumulative_trial_id = optostim_port_4['cumulative_trial_id'].unique()
animal_id = optostim_port_4['animal_id'].unique()

# for each animal_id and cumulative_trial_id get the x, y coordinates from allxy_df
# store it in a new dataframe optostim_port_4_xy
optostim_port_4_xy = allxy_df[(allxy_df['trial_id'].isin(cumulative_trial_id)) & (allxy_df['animal_id'].isin(animal_id))]
# optostim_port_4_xy.info()
# save optostim_port_4_xy to a csv file
optostim_port_4_xy_file = os.path.join(Output_Folder, 'optostim_port_4_xy.csv')
optostim_port_4_xy.to_csv(optostim_port_4_xy_file, index=False)

In [30]:
# get all unique cumulative_trial_id, animal_id from control_df
cumulative_trial_id = control_df['cumulative_trial_id'].unique()
animal_id = control_df['animal_id'].unique()

# for each animal_id and cumulative_trial_id get the x, y coordinates from allxy_df
# store it in a new dataframe control_df_xy
control_df_xy = allxy_df[(allxy_df['trial_id'].isin(cumulative_trial_id)) & (allxy_df['animal_id'].isin(animal_id))]
# control_df_xy.info()

# save control_df_xy to a csv file
control_df_xy_file = os.path.join(Output_Folder, 'control_df_xy.csv')
control_df_xy.to_csv(control_df_xy_file, index=False)

In [59]:
# now loop through optostim_port_1_xy and plot the x, y coordinates 

optostim_port_1_xy.head() 

Unnamed: 0,x,y,trial_id,animal_id,date
52026,675.509847,472.040456,21079,SP108,2023-08-10
52027,672.940043,471.95284,21079,SP108,2023-08-10
52028,671.99764,471.388234,21079,SP108,2023-08-10
52029,671.420695,471.367045,21079,SP108,2023-08-10
52030,671.258219,471.923116,21079,SP108,2023-08-10
