# MED-PC Calculating Latencies

## Importing the Python Libraries

In [1]:
import sys
import glob
from collections import defaultdict
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from medpc2excel.medpc_read import medpc_read

In [3]:
# setting path
sys.path.append('../src')

In [4]:
# All the libraries that were created for this repository
import extract.dataframe
import processing.tone
import extract.metadata

In [5]:
# Increase size of plot in jupyter

plt.rcParams["figure.figsize"] = (10,6)

# Loading in Recording and Metadata from Previous Notebook

# NOTE: If you changed the directory of where the MED-PC recording dataframes, then you must change the directories in the following paths

- This will get all the files in each specified path

In [6]:
concatted_medpc_files = glob.glob("./data/extracted_recording_data_and_metadata/*/MEDPC_recording_cage_*.csv")
metadata_files = glob.glob("./data/extracted_recording_data_and_metadata/*/metadata_cage_*.csv")
recording_and_metadata_files = glob.glob("./data/extracted_recording_data_and_metadata/*/recording_and_metadata_cage_*.csv")

# NOTE: If there is more than one MED-PC recording dataframes, then you must manually change the path in the `pd.read_csv()` for the corresponding variables. Verify if this is the correct file that you want to use

In [7]:
metadata_files[0]

'./data/extracted_recording_data_and_metadata/cage_1_2_3_4_date_20220503_20220516/metadata_cage_1_2_3_4_date_20220503_20220516.csv'

In [8]:
concatted_medpc_files[0]

'./data/extracted_recording_data_and_metadata/cage_1_2_3_4_date_20220503_20220516/MEDPC_recording_cage_1_2_3_4_date_20220503_20220516.csv'

In [9]:
recording_and_metadata_files[0]

'./data/extracted_recording_data_and_metadata/cage_1_2_3_4_date_20220503_20220516/recording_and_metadata_cage_1_2_3_4_date_20220503_20220516.csv'

In [10]:
metadata_df = pd.read_csv(metadata_files[0], index_col=0)
concatted_medpc_df = pd.read_csv(concatted_medpc_files[0], index_col=0)
recording_and_metadata_df = pd.read_csv(recording_and_metadata_files[0], index_col=0)

- The Dataframe the contains the metadata(columns) of each recording file(rows)
    - We will mostly use this to get the cage number for each subject

In [11]:
metadata_df.head()

Unnamed: 0,index,File,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
0,./data/timestamp_dataframes/2022-05-06_12h59m_...,C:\MED-PC\Data\2022-05-06_12h59m_Subject 3.4 (...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4.0
1,./data/timestamp_dataframes/2022-05-06_08h37m_...,C:\MED-PC\Data\2022-05-06_08h37m_Subject 2.3.txt,05/06/22,05/06/22,2.3,Pilot of Pilot,Cage 1,1,08:37:09,09:53:25,levelNP_CS_reward_laserepochON1st_noshock,1.0
2,./data/timestamp_dataframes/2022-05-10_14h40m_...,C:\MED-PC\Data\2022-05-10_14h40m_Subject 4.3 (...,05/10/22,05/10/22,4.3 (3),Pilot of Pilot,Cage 4,2,14:40:24,15:43:18,levelNP_CS_reward_laserepochON1st_noshock,4.0
3,./data/timestamp_dataframes/2022-05-06_12h59m_...,C:\MED-PC\Data\2022-05-06_12h59m_Subject 4.3 (...,05/06/22,05/06/22,4.3 (3),Pilot of Pilot,Cage 4,2,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4.0
4,./data/timestamp_dataframes/2022-05-04_08h43m_...,C:\MED-PC\Data\2022-05-04_08h43m_Subject 2.3.txt,05/04/22,05/04/22,2.3,Pilot of Pilot,Cage 1,3,08:43:11,09:54:22,levelNP_CS_reward_laserepochON1st_noshock,1.0


- The Dataframe the contains the recording data for all the files.
    - The 1st row is the first data entries for each type of data(with the type being specified by the column label). This does not mean that all these are related or occured for the same trial. i.e. There can be many port entries and exits for a subject before and after a tone is played.
    - Each recording session will usually have a few thousand rows(although, most of of the columns will probably be blank by the end). And then after the last row of one session, the next row will be the next session with a different `file_path`

In [12]:
concatted_medpc_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,file_path
0,12.34,64.0,399.0,0.0,60.01,12.39,1.0,0.0,20220506,3.4 (2),./data/timestamp_dataframes/2022-05-06_12h59m_...
1,14.6,144.0,399.0,0.0,140.01,14.79,1.0,0.0,20220506,3.4 (2),./data/timestamp_dataframes/2022-05-06_12h59m_...
2,23.95,234.0,399.0,0.0,230.01,24.88,1.0,0.0,20220506,3.4 (2),./data/timestamp_dataframes/2022-05-06_12h59m_...
3,31.83,314.0,399.0,0.0,310.01,31.9,1.0,0.0,20220506,3.4 (2),./data/timestamp_dataframes/2022-05-06_12h59m_...
4,31.99,389.0,399.0,0.0,385.01,32.09,1.0,0.0,20220506,3.4 (2),./data/timestamp_dataframes/2022-05-06_12h59m_...


- This dataframe combines the recording data and metadata dataframes. This is done by using the subject ID as a common column to merge together off of.

In [13]:
recording_and_metadata_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
0,12.34,64.0,399.0,0.0,60.01,12.39,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
1,14.6,144.0,399.0,0.0,140.01,14.79,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
2,23.95,234.0,399.0,0.0,230.01,24.88,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
3,31.83,314.0,399.0,0.0,310.01,31.9,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
4,31.99,389.0,399.0,0.0,385.01,32.09,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4


## Getting the Cage Numbers and the Dates for the file names

- Seeing if any rows have NaN values for the cages

In [14]:
metadata_df[pd.isna(metadata_df["cage"])]

Unnamed: 0,index,File,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
88,./data/timestamp_dataframes/2022-05-16_10h57m_...,C:\MED-PC\Data\2022-05-16_10h57m_Subject .txt,05/16/22,05/16/22,,,,1,10:57:55,11:01:10,pumptest,


- Dropping the NaN values so we can get the cage numbers

In [15]:
metadata_df = metadata_df.dropna(subset=["cage"])

In [16]:
# removing blank spaces
cage_numbers = [str(int(number)) for number in metadata_df["cage"].unique() if number is not np.nan]
# sorting numbers
cage_numbers = sorted(cage_numbers)
cage_numbers_for_title = "_".join(cage_numbers)

In [17]:
cage_numbers_for_title

'1_2_3_4'

In [18]:
# Getting the first and last recording date to get a range
earliest_date = concatted_medpc_df["date"].min()
latest_date = concatted_medpc_df["date"].max()

In [19]:
earliest_date

20220503

In [20]:
latest_date

20220516

# Getting the First Port Entry After Each Tone

- Getting all the rows that correspond to one of the files paths. This will be the equivalent of getting all the rows for one session

In [21]:
example_one_session_recording_df = recording_and_metadata_df[recording_and_metadata_df["file_path"] == recording_and_metadata_df["file_path"].unique()[0]
]

- Everything looks normal enough

In [22]:
example_one_session_recording_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
0,12.34,64.0,399.0,0.0,60.01,12.39,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
1,14.6,144.0,399.0,0.0,140.01,14.79,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
2,23.95,234.0,399.0,0.0,230.01,24.88,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
3,31.83,314.0,399.0,0.0,310.01,31.9,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
4,31.99,389.0,399.0,0.0,385.01,32.09,1.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4


- `(S)CSpresentation` is the time that the tone starts playing. MED-PC starts inputting in random numbers that are in the 1000's. So we will remove these

In [23]:
example_one_session_recording_df[50:60]

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
50,438.69,,,0.0,9000.0,439.62,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
51,446.71,,,0.0,12000.0,447.92,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
52,452.6,,,0.0,9000.0,453.28,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
53,460.11,,,0.0,8500.0,461.21,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
54,464.04,,,0.0,9000.0,464.49,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
55,475.64,,,0.0,9500.0,476.41,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
56,484.23,,,0.0,12000.0,484.53,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
57,487.74,,,0.0,8000.0,489.12,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
58,495.85,,,0.0,9500.0,499.31,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
59,505.47,,,0.0,8000.0,507.17,0.0,0.0,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4


- `(S)CSpresentation` then starts adding blanks(NaN) so we will remove these too

In [24]:
example_one_session_recording_df.tail()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(B)shockintensity,date,subject,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
2536,,,,,,,1.0,,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
2537,,,,,,,1.0,,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
2538,,,,,,,1.0,,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
2539,,,,,,,1.0,,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4
2540,,,,,,,1.0,,20220506,3.4 (2),...,05/06/22,05/06/22,3.4 (2),Pilot of Pilot,Cage 4,1,12:59:58,14:02:38,levelNP_CS_reward_laserepochON1st_noshock,4


- Getting the tones that were actually used for the recordings

In [25]:
example_one_session_valid_tones = processing.tone.get_valid_tones(example_one_session_recording_df["(S)CSpresentation"])

In [26]:
example_one_session_valid_tones

0       60.01
1      140.01
2      230.01
3      310.01
4      385.01
5      485.01
6      580.01
7      670.01
8      750.01
9      840.01
10     940.01
11    1030.01
12    1150.01
13    1240.01
14    1325.01
15    1415.01
16    1510.01
17    1630.01
18    1710.01
19    1805.01
20    1885.01
21    1975.01
22    2055.01
23    2130.01
24    2230.01
25    2325.01
26    2415.01
27    2495.01
28    2585.01
29    2685.01
30    2775.01
31    2895.01
32    2985.01
33    3070.01
34    3160.01
35    3255.01
36    3345.01
37    3425.01
38    3515.01
39    3615.01
Name: (S)CSpresentation, dtype: float64

- With these tone playing times, we will get the first port entry that comes after the tone playing time
    - This was done by getting all the port entries that came after the tone. And then getting the port entry time that was earliest in time(aka smallest number)

In [27]:
processing.tone.get_first_port_entries_after_tone(tone_pd_series=example_one_session_valid_tones, port_entries_pd_series=example_one_session_recording_df["(P)Portentry"], port_exits_pd_series=example_one_session_recording_df["(N)Portexit"])

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone
0,60.01,69.0,73.66
1,140.01,148.27,151.69
2,230.01,231.91,233.4
3,310.01,320.97,327.71
4,385.01,394.75,398.05
5,485.01,487.74,489.12
6,580.01,584.0,598.64
7,670.01,676.26,676.3
8,750.01,777.83,782.02
9,840.01,840.06,841.92


- Getting the first port entry times for all the sessions

In [28]:
concatted_first_porty_entry_dataframe = processing.tone.get_concatted_first_porty_entry_after_tone_dataframe(concatted_medpc_df=concatted_medpc_df)

In [29]:
concatted_first_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject
0,60.01,69.0,73.66,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
1,140.01,148.27,151.69,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
2,230.01,231.91,233.4,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
3,310.01,320.97,327.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
4,385.01,394.75,398.05,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)


In [30]:
concatted_first_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject
0,60.01,69.0,73.66,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
1,140.01,148.27,151.69,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
2,230.01,231.91,233.4,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
3,310.01,320.97,327.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
4,385.01,394.75,398.05,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)


# Getting the Last Port Entry Before the Tone

- We will do the same, but get the port entries that happened before the tone

In [31]:
port_entries_pd_series = example_one_session_recording_df["(P)Portentry"]
port_exit_pd_series = example_one_session_recording_df["(N)Portexit"]

- Example run for one session

In [32]:
example_last_port_entries_before_tone = processing.tone.get_last_port_entries_before_tone(tone_pd_series=example_one_session_valid_tones, port_entries_pd_series=port_entries_pd_series, port_exits_pd_series=port_exit_pd_series)

In [33]:
example_last_port_entries_before_tone

Unnamed: 0,current_tone_time,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone
0,60.01,54.9,55.73
1,140.01,125.14,126.71
2,230.01,224.43,225.0
3,310.01,298.91,299.28
4,385.01,384.15,384.46
5,485.01,484.23,484.53
6,580.01,578.18,579.04
7,670.01,667.64,668.2
8,750.01,734.83,734.87
9,840.01,826.15,826.57


- Getting all the port entries that happen before the tone for all sessions

In [34]:
concatted_last_porty_entry_dataframe = processing.tone.get_concatted_last_porty_entry_before_tone_dataframe(concatted_medpc_df=concatted_medpc_df)

In [35]:
concatted_last_porty_entry_dataframe

Unnamed: 0,current_tone_time,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,file_path,date,subject
0,60.01,54.90,55.73,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
1,140.01,125.14,126.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
2,230.01,224.43,225.00,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
3,310.01,298.91,299.28,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
4,385.01,384.15,384.46,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2)
...,...,...,...,...,...,...
6548,3160.01,3152.67,3155.50,./data/timestamp_dataframes/2022-05-08_11h14m_...,20220508,1.3
6549,3255.01,3245.72,3246.89,./data/timestamp_dataframes/2022-05-08_11h14m_...,20220508,1.3
6550,3345.01,3344.18,3345.95,./data/timestamp_dataframes/2022-05-08_11h14m_...,20220508,1.3
6551,3425.01,3423.33,3424.15,./data/timestamp_dataframes/2022-05-08_11h14m_...,20220508,1.3


## Merging the dataframes for the first port entry after the tone and the last tone before the tone 

In [36]:
# Combining it based on the tone times and the file path
concatted_first_and_last_porty_entry_dataframe = pd.merge(concatted_first_porty_entry_dataframe, concatted_last_porty_entry_dataframe,  how='left', left_on=['current_tone_time','file_path'], right_on = ['current_tone_time','file_path'], suffixes=('', '_y'))
# Dropping all columns that are duplicates
concatted_first_and_last_porty_entry_dataframe = concatted_first_and_last_porty_entry_dataframe.drop(concatted_first_and_last_porty_entry_dataframe.filter(regex='_y$').columns.tolist(), axis=1)

In [37]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone
0,60.01,69.0,73.66,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),54.9,55.73
1,140.01,148.27,151.69,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),125.14,126.71
2,230.01,231.91,233.4,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),224.43,225.0
3,310.01,320.97,327.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),298.91,299.28
4,385.01,394.75,398.05,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),384.15,384.46


In [38]:
concatted_first_and_last_porty_entry_dataframe.shape

(6553, 8)

- Getting all the rows that have port entry to port exit time that overlaps with the tone playing
    - This would be the port entries that came before the tone, but had a port exit after

In [39]:
port_entries_that_overlap = concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["port_exit_after_last_port_entry_before_tone"] >= concatted_first_and_last_porty_entry_dataframe["current_tone_time"]]

In [40]:
port_entries_that_overlap.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone
21,1975.01,1975.87,1976.1,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),1973.96,1975.69
47,670.01,670.27,670.89,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,669.88,670.17
53,1240.01,1242.58,1243.36,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,1239.45,1240.12
62,2055.01,2067.11,2073.44,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,2052.44,2055.04
63,2130.01,2134.46,2134.66,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,2129.2,2130.11


In [41]:
port_entries_that_overlap.shape

(1507, 8)

# Getting the Latency for Each Session of a Tone Playing

- To calculate latency, we would just subtract the time of the first port entry to that of the tone playing

In [42]:
concatted_first_and_last_porty_entry_dataframe["latency"] = concatted_first_porty_entry_dataframe["first_port_entry_after_tone"] - concatted_first_and_last_porty_entry_dataframe["current_tone_time"]

In [43]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency
0,60.01,69.0,73.66,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),54.9,55.73,8.99
1,140.01,148.27,151.69,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),125.14,126.71,8.26
2,230.01,231.91,233.4,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),224.43,225.0,1.9
3,310.01,320.97,327.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),298.91,299.28,10.96
4,385.01,394.75,398.05,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),384.15,384.46,9.74


- For all the latencies that are are greater than 30 seconds, we changed the latency to 30 seconds

In [44]:
concatted_first_and_last_porty_entry_dataframe["latency_adjusted_greater_than_30"] = concatted_first_and_last_porty_entry_dataframe["latency"].apply(lambda x: 30 if x >= 30 else x)

In [45]:
concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["latency"] >= 30].head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30
12,1150.01,1224.44,1229.68,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),1129.43,1131.25,74.43,30.0
23,2130.01,2192.23,2195.37,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),2093.55,2094.05,62.22,30.0
109,2585.01,2615.82,2619.22,./data/timestamp_dataframes/2022-05-10_14h40m_...,20220510,4.3 (3),2580.63,2581.81,30.81,30.0
132,940.01,982.59,982.62,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,4.3 (3),931.0,931.97,42.58,30.0
154,2985.01,3023.64,3027.26,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,4.3 (3),2982.57,2984.74,38.63,30.0


# Seeing which port entries and port exit durations overlap with the tone

- Making a column that states whether or not the last port entry before the tone has a port exit after the tone. This would mean that they are overlapping.

In [46]:
concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"] = concatted_first_and_last_porty_entry_dataframe.apply(lambda row: True if row["port_exit_after_last_port_entry_before_tone"] >= row["current_tone_time"] else False, axis=1)


In [47]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone
0,60.01,69.0,73.66,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),54.9,55.73,8.99,8.99,False
1,140.01,148.27,151.69,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),125.14,126.71,8.26,8.26,False
2,230.01,231.91,233.4,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),224.43,225.0,1.9,1.9,False
3,310.01,320.97,327.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),298.91,299.28,10.96,10.96,False
4,385.01,394.75,398.05,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),384.15,384.46,9.74,9.74,False


- Comparing the latencies of the first port entry after the tone between these two groups:
    - 1. Those with the last port entry before the tone that has a port exit after the tone("overlapping" group)
    - 2. Those with the last port entry before the tone that has a port exit before the tone("nonoverlapping" group)

In [48]:
overlapping_df = concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"]]

In [49]:
non_overlapping_df = concatted_first_and_last_porty_entry_dataframe[~concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"]]

- Number of rows of overlapping and nonoverlapping last port entries before the tone

In [50]:
overlapping_df.shape

(1507, 11)

In [51]:
non_overlapping_df.shape

(5046, 11)

- Latency between the tone playing and the first port entry after the tone

In [52]:
overlapping_df["latency"].mean()

13.61821926910289

In [53]:
non_overlapping_df["latency"].mean()

14.063250805152887

- Latency between the tone playing and the first port entry after the tone. This had been adjusted so that all latencies that are greater than 30 seconds were adjusted to be just 30 seconds.

In [54]:
overlapping_df["latency_adjusted_greater_than_30"].mean()

12.339707641195924

In [55]:
non_overlapping_df["latency_adjusted_greater_than_30"].mean()

8.51532407407399

# Adjusting the Latencies if the Port Entries/Exit Overlap With the Tone Times

- For all the rows with overlapping last port entries before the tone, we will adjust the latency of the tone to the first port entry after the tone to 0. (Because the first port entry after the tone would be considered to be the same as the last port entry before the tone)

In [56]:
concatted_first_and_last_porty_entry_dataframe["latency_adjusted_overlap"] = concatted_first_and_last_porty_entry_dataframe.apply(lambda row: 0 if row["overlapping_port_entry_with_tone"] else row["latency_adjusted_greater_than_30"], axis=1)

In [57]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone,latency_adjusted_overlap
0,60.01,69.0,73.66,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),54.9,55.73,8.99,8.99,False,8.99
1,140.01,148.27,151.69,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),125.14,126.71,8.26,8.26,False,8.26
2,230.01,231.91,233.4,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),224.43,225.0,1.9,1.9,False,1.9
3,310.01,320.97,327.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),298.91,299.28,10.96,10.96,False,10.96
4,385.01,394.75,398.05,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),384.15,384.46,9.74,9.74,False,9.74


- All the rows with overlapping last port entries before the tone

In [58]:
concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"]].head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone,latency_adjusted_overlap
21,1975.01,1975.87,1976.1,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),1973.96,1975.69,0.86,0.86,True,0.0
47,670.01,670.27,670.89,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,669.88,670.17,0.26,0.26,True,0.0
53,1240.01,1242.58,1243.36,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,1239.45,1240.12,2.57,2.57,True,0.0
62,2055.01,2067.11,2073.44,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,2052.44,2055.04,12.1,12.1,True,0.0
63,2130.01,2134.46,2134.66,./data/timestamp_dataframes/2022-05-06_08h37m_...,20220506,2.3,2129.2,2130.11,4.45,4.45,True,0.0


## Labeling if a row's latency is less than 10 seconds or not

- We will be getting the ratio of latencies(from the time that the tone played, to the first port entry after the tone) that are less than 10 seconds for each session. So we will label all the adjusted latencies that are less than 10 seconds as `True`, and the those with latencies that are greater than 10 seconds as `False` 

In [59]:
concatted_first_and_last_porty_entry_dataframe["original_latency_less_than_10_seconds"] = concatted_first_and_last_porty_entry_dataframe["latency_adjusted_greater_than_30"].apply(lambda x: True if x <= 10 else False)

In [60]:
concatted_first_and_last_porty_entry_dataframe["overlap_adjusted_latency_less_than_10_seconds"] = concatted_first_and_last_porty_entry_dataframe["latency_adjusted_overlap"].apply(lambda x: True if x <= 10 else False)

In [61]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone,latency_adjusted_overlap,original_latency_less_than_10_seconds,overlap_adjusted_latency_less_than_10_seconds
0,60.01,69.0,73.66,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),54.9,55.73,8.99,8.99,False,8.99,True,True
1,140.01,148.27,151.69,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),125.14,126.71,8.26,8.26,False,8.26,True,True
2,230.01,231.91,233.4,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),224.43,225.0,1.9,1.9,False,1.9,True,True
3,310.01,320.97,327.71,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),298.91,299.28,10.96,10.96,False,10.96,False,False
4,385.01,394.75,398.05,./data/timestamp_dataframes/2022-05-06_12h59m_...,20220506,3.4 (2),384.15,384.46,9.74,9.74,False,9.74,True,True


# Making Dataframes that have Each Subject as the Row and Each Date as the Column

- Pivot plots allow us to make columns out of each unique entry in a selected column. (For this, it will be the date column that we will turn into columns) The index for the rows will be each unique entry in another column. (For this, it will be the subject's ID) And the values will be the aggregate of the values that have both the values in the previous two selected columns. (For this it will be the latency columns and we will take the mean)
    - https://pandas.pydata.org/docs/reference/api/pandas.pivot_table.html

- Making a folder for all the pivotplots

# NOTE: If you decide to change the cage numbers or the dates used from the original ones from the imported dataframes, you will need to change the subsequent path to the cage number and dates you are using

In [62]:
pivot_plot_output_directory = "./data/subject_rows_with_date_columns_dataframes/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [63]:
pivot_plot_output_directory

'./data/subject_rows_with_date_columns_dataframes/cage_1_2_3_4_date_20220503_20220516'

In [64]:
os.makedirs(pivot_plot_output_directory, exist_ok=True)

## Making a pivot plot with the original latency

In [65]:
all_latency_pivot_plot = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='latency', \
                                        index=['subject'], columns=['date'], aggfunc=np.mean)


In [66]:
all_latency_pivot_plot.head()

date,20220503,20220504,20220505,20220506,20220507,20220508,20220509,20220510,20220511,20220512,20220516
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1.1,42.218,6.046829,11.53775,5.956341,3.598,10.86359,7.184,6.87675,9.927,,8.523659
1.2,49.817857,25.471538,13.112195,7.62825,5.648,4.987692,4.09525,9.064,6.9265,18.89,10.487
1.3,86.598,36.300526,373.84475,14.2345,7.001,8.922821,8.97225,8.34525,7.851,16.6055,13.88725
1.4,61.345333,10.832683,125.872,29.977805,12.334,6.594103,5.93475,8.869625,19.3865,,12.103659
2.1,30.953125,7.5485,18.269268,5.6185,3.92925,6.335897,5.16775,6.824,4.94175,17.10825,9.8885


- Getting the cage numbers for each subject and then making a dictionary out of it
    - This will be used to add the cage information to the pivot plots

In [67]:
# Converting the cage numbers from floats to integers
# When the metadata is imported from the csv files, the cage number is imported as a float
metadata_df["cage"] = metadata_df["cage"].astype(int)

In [68]:
subject_to_cage = pd.Series(metadata_df["cage"].values,index=metadata_df["Subject"].values).to_dict()

In [69]:
subject_to_cage

{'3.4 (2)': 4,
 '2.3': 1,
 '4.3 (3)': 4,
 '2.1': 2,
 '1.2': 2,
 '1.1': 1,
 '1.3': 2,
 '2.4': 2,
 '4.1 (1)': 3,
 '4.4 (4)': 4,
 '3.3 (4)': 3,
 '3.2 (2)': 3,
 '1.4': 1,
 '3.1 (1)': 4,
 '2.2': 1,
 '4.2 (3)': 3}

- Adding the cage information with the dictionary

In [70]:
all_latency_pivot_plot["cage"] = all_latency_pivot_plot.index.map(subject_to_cage)

In [71]:
all_latency_pivot_plot.head()

date,20220503,20220504,20220505,20220506,20220507,20220508,20220509,20220510,20220511,20220512,20220516,cage
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1.1,42.218,6.046829,11.53775,5.956341,3.598,10.86359,7.184,6.87675,9.927,,8.523659,1
1.2,49.817857,25.471538,13.112195,7.62825,5.648,4.987692,4.09525,9.064,6.9265,18.89,10.487,2
1.3,86.598,36.300526,373.84475,14.2345,7.001,8.922821,8.97225,8.34525,7.851,16.6055,13.88725,2
1.4,61.345333,10.832683,125.872,29.977805,12.334,6.594103,5.93475,8.869625,19.3865,,12.103659,1
2.1,30.953125,7.5485,18.269268,5.6185,3.92925,6.335897,5.16775,6.824,4.94175,17.10825,9.8885,2


In [72]:
subject_to_date_original_latency_cage_{}_date_{}_{}.csv

SyntaxError: invalid syntax (3204455262.py, line 1)

In [73]:
os.path.join(pivot_plot_output_directory, "subject_to_date_original_latency_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date))

'./data/subject_rows_with_date_columns_dataframes/cage_1_2_3_4_date_20220503_20220516/subject_to_date_original_latency_cage_1_2_3_4_date_20220503_20220516.csv'

In [74]:
all_latency_pivot_plot.to_csv(os.path.join(pivot_plot_output_directory, "subject_to_date_original_latency_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)))
all_latency_pivot_plot.to_excel(os.path.join(pivot_plot_output_directory, "subject_to_date_original_latency_cage_{}_date_{}_{}.xlsx".format(cage_numbers_for_title, earliest_date, latest_date)))

## Making a pivot plot with the latency that were greater than 30 seconds being adjusted 

In [75]:
latency_greater_than_30_seconds_adjusted_pivot_plot = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='latency_adjusted_greater_than_30', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [76]:
latency_greater_than_30_seconds_adjusted_pivot_plot["cage"] = latency_greater_than_30_seconds_adjusted_pivot_plot.index.map(subject_to_cage)

In [77]:
latency_greater_than_30_seconds_adjusted_pivot_plot.head()

date,20220503,20220504,20220505,20220506,20220507,20220508,20220509,20220510,20220511,20220512,20220516,cage
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1.1,19.98,6.046829,9.44675,5.956341,3.598,10.670769,7.184,6.862375,9.9235,,8.523659,1
1.2,27.12,17.573077,11.960732,7.62825,5.648,4.987692,4.09525,7.562,6.9265,16.95725,9.76425,2
1.3,27.746,21.131316,26.896,10.40525,6.6595,8.282564,7.27,8.2735,7.7825,15.1805,13.352,2
1.4,22.285333,10.832683,24.336,12.144634,9.19425,6.594103,5.93475,8.77925,14.12675,,11.990732,1
2.1,17.701875,7.473,10.245122,5.2895,3.92925,5.700256,5.16775,6.824,4.94175,15.1845,9.8715,2


In [78]:
latency_greater_than_30_seconds_adjusted_pivot_plot.to_csv(os.path.join(pivot_plot_output_directory, "subject_to_date_latency_greater_than_30_seconds_adjusted_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)))
latency_greater_than_30_seconds_adjusted_pivot_plot.to_excel(os.path.join(pivot_plot_output_directory, "subject_to_date_latency_greater_than_30_seconds_adjusted_cage_{}_date_{}_{}.xlsx".format(cage_numbers_for_title, earliest_date, latest_date)))

## Making a pivot plot with the latencies that overlapped with a tone playing

In [79]:
latency_that_overlaps_with_tone_adjusted_pivot_plot = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='latency_adjusted_overlap', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [80]:
latency_that_overlaps_with_tone_adjusted_pivot_plot["cage"] = latency_that_overlaps_with_tone_adjusted_pivot_plot.index.map(subject_to_cage)

In [81]:
latency_that_overlaps_with_tone_adjusted_pivot_plot.head()

date,20220503,20220504,20220505,20220506,20220507,20220508,20220509,20220510,20220511,20220512,20220516,cage
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1.1,19.98,5.63878,8.4745,3.185366,1.96425,2.244872,1.645,1.147125,0.66375,,0.994878,1
1.2,22.605714,17.554615,10.497805,6.67125,4.246,3.467436,2.48425,1.72675,2.05925,1.29275,1.41775,2
1.3,27.746,21.131316,26.146,9.18175,3.99875,4.985128,3.8335,2.83,3.09475,1.147,0.68725,2
1.4,21.607333,10.755366,24.336,11.102683,6.5295,5.418974,3.512,2.715875,4.73125,,1.088537,1
2.1,12.203125,7.10625,10.013659,4.8035,2.6995,3.65641,1.606,1.24275,2.20575,3.28225,0.93,2


In [82]:
latency_that_overlaps_with_tone_adjusted_pivot_plot.to_csv(os.path.join(pivot_plot_output_directory, "subject_to_date_latency_that_overlaps_with_tone_adjusted_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)))
latency_that_overlaps_with_tone_adjusted_pivot_plot.to_excel(os.path.join(pivot_plot_output_directory, "subject_to_date_latency_that_overlaps_with_tone_adjusted_cage_{}_date_{}_{}.xlsx".format(cage_numbers_for_title, earliest_date, latest_date)))

## Making a pivot plot with the ratio of original latencies that are less than 10 seconds

In [None]:
original_less_than_10_latency_df = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='original_latency_less_than_10_seconds', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [None]:
original_less_than_10_latency_df["cage"] = original_less_than_10_latency_df.index.map(subject_to_cage)

In [None]:
original_less_than_10_latency_df

In [None]:
original_less_than_10_latency_df.to_csv(os.path.join(pivot_plot_output_directory, "original_less_than_10_seconds_latencies_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)))
original_less_than_10_latency_df.to_excel(os.path.join(pivot_plot_output_directory, "original_less_than_10_seconds_latencies_cage_{}_date_{}_{}.xlsx".format(cage_numbers_for_title, earliest_date, latest_date)))

## Making a pivot plot with the ratio of overlap adjusted latencies that are less than 10 seconds

In [None]:
overlap_adjusted_less_than_10_latency_df = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='overlap_adjusted_latency_less_than_10_seconds', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [None]:
overlap_adjusted_less_than_10_latency_df["cage"] = overlap_adjusted_less_than_10_latency_df.index.map(subject_to_cage)

In [None]:
overlap_adjusted_less_than_10_latency_df

In [None]:
overlap_adjusted_less_than_10_latency_df.to_csv(os.path.join(pivot_plot_output_directory, "overlap_adjusted_less_than_10_seconds_latencies_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)))
overlap_adjusted_less_than_10_latency_df.to_excel(os.path.join(pivot_plot_output_directory, "overlap_adjusted_less_than_10_seconds_latencies_cage_{}_date_{}_{}.xlsx".format(cage_numbers_for_title, earliest_date, latest_date)))

# Plotting the latencies

## Getting the averages for all the trials in one subject and one day

- We will plot a line for each subject. With the X-axis being the days since the first session. And the Y-axis is the latency values averaged across all the trials for one recording session
- Grouping all the rows with the same subject and date(aka, all the trials in one session). And then getting the mean for each value

In [None]:
grouped_averaged_first_porty_entry_dataframe = concatted_first_and_last_porty_entry_dataframe.groupby(by=["file_path", "date", "subject"]).mean()

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

## Reformatting the Dataframe for plotting

- Resetting the index so that we can use the data as a value

In [None]:
grouped_averaged_first_porty_entry_dataframe = grouped_averaged_first_porty_entry_dataframe.reset_index()

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

- Turning the date column which is a string, into an integer
    - This will allow us to use the date as the X-axis for plotting

In [None]:
grouped_averaged_first_porty_entry_dataframe["date_int"] = grouped_averaged_first_porty_entry_dataframe["date"].astype(int)

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

- Adding the cage information
    - We will make one plot for each cage

In [None]:
grouped_averaged_first_porty_entry_dataframe["cage"] = grouped_averaged_first_porty_entry_dataframe["subject"].map(subject_to_cage).astype(int)

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

## Plotting the original latency

- Making the directories

In [None]:
original_average_latency_output_directory = "./data/plots/original_average_latency_plots/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
original_average_latency_output_directory

In [None]:
os.makedirs(original_average_latency_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["latency_adjusted_greater_than_30"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 30)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Adjusted Average Latency of First Entry to Tone Onset")
    ax.set_title("Latency of Port Entry to Tone: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    file_name = "original_average_port_entry_latency_cage_{}_date_{}_{}.png".format(cage, earliest_date, latest_date)
    plt.savefig(os.path.join(original_average_latency_output_directory, file_name))

## Plotting the original ratio of latencies that are less than 10 seconds

In [None]:
original_less_than_10_second_latency_ratio_output_directory = "./data/plots/original_less_than_10_second_latency_ratio/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
original_less_than_10_second_latency_ratio_output_directory

In [None]:
os.makedirs(original_less_than_10_second_latency_ratio_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["original_latency_less_than_10_seconds"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 1)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Proportion of Latencies")
    ax.set_title("Less Than 10 Seconds Latencies from Tone Onset: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    file_name = "original_less_than_10_second_latency_cage_{}_date_{}_{}.png".format(cage, earliest_date, latest_date)
    plt.savefig(os.path.join(original_less_than_10_second_latency_ratio_output_directory, file_name))


# EDIT FROM HERE

In [None]:
raise ValueError("Stop")

# Plotting the Adjusted Latencies for Entries that Overlap with the Tone

## Plotting the Latencies 

In [None]:
overlap_adjusted_average_latency_output_directory = "./data/plots/overlap_adjusted_average_latency_plots/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
overlap_adjusted_average_latency_output_directory

In [None]:
os.makedirs(overlap_adjusted_average_latency_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["overlapping_port_entry_with_tone"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 1)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Proportion of Latencies")
    ax.set_title("Port Entries/Exit that Overlap with Tone Onset: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    file_name = "original_average_port_entry_latency_cage_{}_date_{}_{}.png".format(cage, earliest_date, latest_date)

    plt.savefig("./data/plots/overlap_ratios/overlap_ratios_cage_{}_date_{}_{}.png".format(cage))


# Plotting the Adjusted Latencies That are Overlapped

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["latency_adjusted_overlap"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 30)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Adjusted Average Latency of First Entry to Tone Onset")
    ax.set_title("Latency of Port Entry to Tone Adjusted for Overlap: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    plt.savefig("./data/plots/overlap_adjusted_latencies/overlap_adjusted_latencies_cage_{}_date_{}_{}.png".format(cage))    

In [None]:
original_less_than_10_second_latency_ratio_output_directory = "./data/plots/original_less_than_10_second_latency_ratio/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
original_less_than_10_second_latency_ratio_output_directory

In [None]:
os.makedirs(original_less_than_10_second_latency_ratio_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["overlap_adjusted_latency_less_than_10_seconds"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 1)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Proportion of Latencies")
    ax.set_title("Overlap Adjusted Less Than 10sec Latencies from Tone: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    plt.savefig("./data/plots/overlap_adjusted_proportion_of_latencies_less_than_10_seconds/overlap_adjusted_proportion_of_latencies_less_than_10_seconds_cage_{}_date_{}_{}.png".format(cage))
