# Notebook 2: MED-PC Calculating Latencies

## Importing the Python Libraries

In [1]:
import sys
import glob
from collections import defaultdict
import os

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from medpc2excel.medpc_read import medpc_read

In [3]:
# setting path
sys.path.append('../../src')

In [4]:
# All the libraries that were created for this repository
import extract.dataframe
import processing.tone
import extract.metadata

In [5]:
# Increase size of plot in jupyter

plt.rcParams["figure.figsize"] = (10,6)

# Loading in Recording and Metadata from Previous Notebook

# NOTE: If you changed the directory of where the MED-PC recording dataframes, then you must change the directories in the following paths

- This will get all the files in each specified path. The `*` is called a wildcard, and it can be replaced with any number of characters.
    - For more information: https://www.malikbrowne.com/blog/a-beginners-guide-glob-patterns

In [6]:
concatted_medpc_files = glob.glob("./proc/extracted_recording_data_and_metadata/*/MEDPC_recording_*.csv")
metadata_files = glob.glob("./proc/extracted_recording_data_and_metadata/*/metadata_*.csv")
recording_and_metadata_files = glob.glob("./proc/extracted_recording_data_and_metadata/*/recording_metadata_*.csv")

# NOTE: If there is more than one MED-PC recording dataframes, then you must manually change the path in the `pd.read_csv()` for the corresponding variables. Verify if this is the correct file that you want to use

In [7]:
metadata_files[0]

'./proc/extracted_recording_data_and_metadata/experiment_CD1_vs_C57_Comparison_cage_1_2_3_4_5_6_date_20220920_20220927/metadata_cage_1_2_3_4_5_6_date_20220920_20220927.csv'

In [8]:
concatted_medpc_files[0]

'./proc/extracted_recording_data_and_metadata/experiment_CD1_vs_C57_Comparison_cage_1_2_3_4_5_6_date_20220920_20220927/MEDPC_recording_cage_1_2_3_4_5_6_date_20220920_20220927.csv'

In [9]:
recording_and_metadata_files[0]

'./proc/extracted_recording_data_and_metadata/experiment_CD1_vs_C57_Comparison_cage_1_2_3_4_5_6_date_20220920_20220927/recording_metadata_cage_1_2_3_4_5_6_date_20220920_20220927.csv'

In [10]:
metadata_df = pd.read_csv(metadata_files[0], index_col=0)
concatted_medpc_df = pd.read_csv(concatted_medpc_files[0], index_col=0)
recording_and_metadata_df = pd.read_csv(recording_and_metadata_files[0], index_col=0)

- The Dataframe the contains the metadata(columns) of each recording file(rows)
    - We will mostly use this to get the cage number for each subject

In [11]:
metadata_df.head()

Unnamed: 0,index,File,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
0,./data/2022-09-27_16h16m_Subject 1.1.txt,C:\MED-PC\Data\2022-09-27_16h16m_Subject 1.1.txt,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
1,./data/2022-09-20_10h06m_Subject 1.2.txt,C:\MED-PC\Data\2022-09-20_10h06m_Subject 1.2.txt,09/20/22,09/20/22,1.2,CD1_vs_C57_Comparison,Cage_1,4,10:06:09,11:13:16,C57_reward_training,1.0
2,./data/2022-09-20_14h57m_Subject 6.4.txt,C:\MED-PC\Data\2022-09-20_14h57m_Subject 6.4.txt,09/20/22,09/20/22,6.4,CD1_vs_C57_Comparison,Cage_6,3,14:57:06,15:58:35,CD1_reward_training,6.0
3,./data/2022-09-22_11h37m_Subject 5.3.txt,C:\MED-PC\Data\2022-09-22_11h37m_Subject 5.3.txt,09/22/22,09/22/22,5.3,CD1_vs_C57_Comparison,Cage_5,4,11:37:48,12:44:00,CD1_reward_training,5.0
4,./data/2022-09-22_16h20m_Subject 1.4.txt,C:\MED-PC\Data\2022-09-22_16h20m_Subject 1.4.txt,09/22/22,09/22/22,1.4,CD1_vs_C57_Comparison,Cage_1,2,16:20:57,17:26:49,C57_reward_training,1.0


- The Dataframe the contains the recording data for all the files.
    - The 1st row is the first data entries for each type of data(with the type being specified by the column label). This does not mean that all these are related or occured for the same trial. i.e. There can be many port entries and exits for a subject before and after a tone is played.
    - Each recording session will usually have a few thousand rows(although, most of of the columns will probably be blank by the end). And then after the last row of one session, the next row will be the next session with a different `file_path`

In [12]:
concatted_medpc_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,(B)port_entry_secondscomputer,(F)port_entry_minutescomputer,(J)port_entry_hourscomputer,date,subject,file_path
0,12.5,64.0,399.0,0.0,60.01,12.52,1.0,20.0,26.0,16.0,33.0,25.0,16.0,20220927,1.1,./data/2022-09-27_16h16m_Subject 1.1.txt
1,12.56,144.0,399.0,0.0,140.01,13.16,1.0,41.0,27.0,16.0,33.0,25.0,16.0,20220927,1.1,./data/2022-09-27_16h16m_Subject 1.1.txt
2,13.18,234.0,399.0,0.0,230.01,13.79,1.0,11.0,29.0,16.0,34.0,25.0,16.0,20220927,1.1,./data/2022-09-27_16h16m_Subject 1.1.txt
3,14.99,314.0,399.0,0.0,310.01,15.03,1.0,30.0,30.0,16.0,35.0,25.0,16.0,20220927,1.1,./data/2022-09-27_16h16m_Subject 1.1.txt
4,15.12,389.0,399.0,0.0,385.01,16.05,1.0,46.0,31.0,16.0,35.0,25.0,16.0,20220927,1.1,./data/2022-09-27_16h16m_Subject 1.1.txt


- This dataframe combines the recording data and metadata dataframes. This is done by using the subject ID as a common column to merge together off of.

In [13]:
recording_and_metadata_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
0,12.5,64.0,399.0,0.0,60.01,12.52,1.0,20.0,26.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
1,12.56,144.0,399.0,0.0,140.01,13.16,1.0,41.0,27.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
2,13.18,234.0,399.0,0.0,230.01,13.79,1.0,11.0,29.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
3,14.99,314.0,399.0,0.0,310.01,15.03,1.0,30.0,30.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
4,15.12,389.0,399.0,0.0,385.01,16.05,1.0,46.0,31.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0


# Getting the Cage Numbers and the Dates for the file names

- Seeing if any rows have NaN values for the cages

In [14]:
recording_and_metadata_df[pd.isna(recording_and_metadata_df["cage"])]

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
48299,11.03,64.0,399.0,0.0,60.01,12.14,1.0,29.0,42.0,17.0,...,09/27/22,09/27/22,4.4,CD1_vs_C57_Comparison,,4,17:40:46,18:42:29,CD1_reward_training,
48300,12.17,144.0,399.0,0.0,140.01,12.20,1.0,49.0,43.0,17.0,...,09/27/22,09/27/22,4.4,CD1_vs_C57_Comparison,,4,17:40:46,18:42:29,CD1_reward_training,
48301,17.76,234.0,399.0,0.0,230.01,19.53,1.0,19.0,45.0,17.0,...,09/27/22,09/27/22,4.4,CD1_vs_C57_Comparison,,4,17:40:46,18:42:29,CD1_reward_training,
48302,20.39,314.0,399.0,0.0,310.01,21.64,1.0,39.0,46.0,17.0,...,09/27/22,09/27/22,4.4,CD1_vs_C57_Comparison,,4,17:40:46,18:42:29,CD1_reward_training,
48303,29.62,389.0,399.0,0.0,385.01,30.79,1.0,54.0,47.0,17.0,...,09/27/22,09/27/22,4.4,CD1_vs_C57_Comparison,,4,17:40:46,18:42:29,CD1_reward_training,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
565243,,,,,,,,,,,...,09/27/22,09/27/22,4.2,CD1_vs_C57_Comparison,,2,17:40:46,18:42:29,CD1_reward_training,
565244,,,,,,,,,,,...,09/27/22,09/27/22,4.2,CD1_vs_C57_Comparison,,2,17:40:46,18:42:29,CD1_reward_training,
565245,,,,,,,,,,,...,09/27/22,09/27/22,4.2,CD1_vs_C57_Comparison,,2,17:40:46,18:42:29,CD1_reward_training,
565246,,,,,,,,,,,...,09/27/22,09/27/22,4.2,CD1_vs_C57_Comparison,,2,17:40:46,18:42:29,CD1_reward_training,


- Dropping the NaN values so we can get the cage numbers

In [15]:
recording_and_metadata_df = recording_and_metadata_df.dropna(subset=["cage"])

# Adjusting the days and cages that are being looked at

- List of all the subject names

In [16]:
all_subjects = sorted(recording_and_metadata_df["subject"].unique())

In [17]:
all_subjects

[1.1,
 1.2,
 1.3,
 1.4,
 2.1,
 2.2,
 2.3,
 2.4,
 3.1,
 3.2,
 3.3,
 3.4,
 4.1,
 4.2,
 4.3,
 4.4,
 5.1,
 5.2,
 5.3,
 5.4,
 6.1,
 6.2,
 6.3,
 6.4]

- List of all the cage numbers

In [18]:
all_cages = sorted(recording_and_metadata_df["cage"].unique())

In [19]:
all_cages

[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]

- List of all the dates

In [20]:
all_dates = sorted(recording_and_metadata_df["date"].unique())

In [21]:
all_dates

[20220920,
 20220921,
 20220922,
 20220923,
 20220924,
 20220925,
 20220926,
 20220927]

# NOTE: Follow directions below if you want to change which cages or dates to plot

- You must change the cell below by clicking on the cell and then pressing the `esc` key. Then press the `Y` button on your keyboard. If you want to switch it back do the same but press the `R` button instead. This switches it from a cell that is run as if it's code, or back to a cell where it's just treated as text
- Then from above, copy the numbers that you want to use for the cage numbers and the dates

In [22]:
recording_and_metadata_df = recording_and_metadata_df[recording_and_metadata_df["cage"].isin(all_cages) & recording_and_metadata_df["date"].isin(all_dates)]

# Getting the Cage Numbers and the Dates to name the files

In [23]:
# removing blank spaces
cage_numbers = [str(int(number)) for number in all_cages if number is not np.nan]
# sorting numbers
cage_numbers = sorted(cage_numbers)
cage_numbers_for_title = "_".join(cage_numbers)

In [24]:
cage_numbers_for_title

'1_2_3_4_5_6'

In [25]:
# Getting the first and last recording date to get a range
earliest_date = min(all_dates)
latest_date = max(all_dates)

In [26]:
earliest_date

20220920

In [27]:
latest_date

20220927

# Getting the times that the tones are being played

- Getting all the rows that correspond to one of the files paths. This will be the equivalent of getting all the rows for one session

In [28]:
example_file = recording_and_metadata_df["file_path"].unique()[0]

In [29]:
example_file

'./data/2022-09-27_16h16m_Subject 1.1.txt'

In [30]:
example_one_session_recording_df = recording_and_metadata_df[recording_and_metadata_df["file_path"] == example_file]

- Everything looks normal enough

In [31]:
example_one_session_recording_df.head()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
0,12.5,64.0,399.0,0.0,60.01,12.52,1.0,20.0,26.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
1,12.56,144.0,399.0,0.0,140.01,13.16,1.0,41.0,27.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
2,13.18,234.0,399.0,0.0,230.01,13.79,1.0,11.0,29.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
3,14.99,314.0,399.0,0.0,310.01,15.03,1.0,30.0,30.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
4,15.12,389.0,399.0,0.0,385.01,16.05,1.0,46.0,31.0,16.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0


- `(S)CSpresentation` is the time that the tone starts playing. MED-PC starts inputting in random numbers that are in the 1000's. So we will remove these

In [32]:
example_one_session_recording_df[50:60]

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
50,116.61,,,0.0,12000.0,118.38,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
51,121.41,,,0.0,9000.0,121.69,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
52,121.81,,,0.0,8500.0,121.93,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
53,121.95,,,0.0,9000.0,123.05,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
54,123.09,,,0.0,9500.0,123.13,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
55,123.35,,,0.0,12000.0,123.79,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
56,123.81,,,0.0,8000.0,124.33,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
57,124.36,,,0.0,9500.0,124.38,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
58,124.4,,,0.0,8000.0,124.47,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
59,124.51,,,0.0,9000.0,124.6,0.0,0.0,0.0,0.0,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0


- `(S)CSpresentation` then starts adding blanks(NaN) so we will remove these too

In [33]:
example_one_session_recording_df.tail()

Unnamed: 0,(P)Portentry,(Q)USdelivery,(R)UStime,(W)ITIvalues,(S)CSpresentation,(N)Portexit,(K)CStype,(G)controlled_stimulus_secondscomputer,(H)controlled_stimulus_minutescomputer,(I)controlled_stimulus_hourscomputer,...,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
6003,,,,,,,,,,,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
6004,,,,,,,,,,,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
6005,,,,,,,,,,,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
6006,,,,,,,,,,,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0
6007,,,,,,,,,,,...,09/27/22,09/27/22,1.1,CD1_vs_C57_Comparison,Cage_1,1,16:16:59,17:26:20,C57_reward_training,1.0


- We will use a function that removes the `NaN` and the tone times that are divisible by 1000. This will get the tones time that were actually used for the recordings

In [34]:
example_one_session_valid_tones = processing.tone.get_valid_tones(example_one_session_recording_df["(S)CSpresentation"])

In [35]:
example_one_session_valid_tones

0       60.01
1      140.01
2      230.01
3      310.01
4      385.01
5      485.01
6      580.01
7      670.01
8      750.01
9      840.01
10     940.01
11    1030.01
12    1150.01
13    1240.01
14    1325.01
15    1415.01
16    1510.01
17    1630.01
18    1710.01
19    1805.01
20    1885.01
21    1975.01
22    2055.01
23    2130.01
24    2230.01
25    2325.01
26    2415.01
27    2495.01
28    2585.01
29    2685.01
30    2775.01
31    2895.01
32    2985.01
33    3070.01
34    3160.01
35    3255.01
36    3345.01
37    3425.01
38    3515.01
Name: (S)CSpresentation, dtype: float64

# Getting the First Port Entry After Each Tone

- With these tone playing times, we will get the first port entry that comes after the tone playing time
    - This was done by getting all the port entries that came after the tone. And then getting the port entry time that was earliest in time(aka smallest number)

In [36]:
processing.tone.get_first_port_entries_after_tone(tone_pd_series=example_one_session_valid_tones, port_entries_pd_series=example_one_session_recording_df["(P)Portentry"], port_exits_pd_series=example_one_session_recording_df["(N)Portexit"])

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone
0,60.01,61.92,63.45
1,140.01,140.84,140.98
2,230.01,234.61,241.93
3,310.01,313.65,325.43
4,385.01,385.99,389.03
5,485.01,485.4,486.8
6,580.01,580.23,581.31
7,670.01,672.04,674.11
8,750.01,750.13,750.21
9,840.01,841.75,841.79


- Getting the first port entry times for all the sessions

In [37]:
concatted_first_porty_entry_dataframe = processing.tone.get_concatted_first_porty_entry_after_tone_dataframe(concatted_medpc_df=recording_and_metadata_df)

In [38]:
concatted_first_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
1,140.01,140.84,140.98,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1


In [39]:
concatted_first_porty_entry_dataframe.tail()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject
7327,3160.01,3191.74,3191.77,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4
7328,3255.01,3289.64,3289.66,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4
7329,3345.01,3480.7,3480.73,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4
7330,3425.01,3480.7,3480.73,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4
7331,3515.01,3532.77,3532.8,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4


# Getting the Last Port Entry Before the Tone

- We will do the same, but get the port entries that happened before the tone

In [40]:
port_entries_pd_series = example_one_session_recording_df["(P)Portentry"]
port_exit_pd_series = example_one_session_recording_df["(N)Portexit"]

- Example run for one session

In [41]:
example_last_port_entries_before_tone = processing.tone.get_last_port_entries_before_tone(tone_pd_series=example_one_session_valid_tones, port_entries_pd_series=port_entries_pd_series, port_exits_pd_series=port_exit_pd_series)

In [42]:
example_last_port_entries_before_tone

Unnamed: 0,current_tone_time,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone
0,60.01,59.59,61.0
1,140.01,139.02,139.34
2,230.01,229.62,234.59
3,310.01,309.31,313.45
4,385.01,383.6,385.91
5,485.01,480.31,485.27
6,580.01,579.77,580.05
7,670.01,669.48,671.76
8,750.01,746.34,750.09
9,840.01,835.94,841.73


- Getting all the port entries that happen before the tone for all sessions

In [43]:
concatted_last_porty_entry_dataframe = processing.tone.get_concatted_last_porty_entry_before_tone_dataframe(concatted_medpc_df=recording_and_metadata_df)

In [44]:
concatted_last_porty_entry_dataframe

Unnamed: 0,current_tone_time,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,file_path,date,subject
0,60.01,59.59,61.00,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
1,140.01,139.02,139.34,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
2,230.01,229.62,234.59,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
3,310.01,309.31,313.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
4,385.01,383.60,385.91,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1
...,...,...,...,...,...,...
7327,3160.01,3085.81,3086.05,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4
7328,3255.01,3207.68,3207.73,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4
7329,3345.01,3292.24,3292.40,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4
7330,3425.01,3292.24,3292.40,./data/2022-09-23_14h27m_Subject 6.4.txt,20220923,6.4


## Merging the dataframes for the first port entry after the tone and the last tone before the tone 

In [45]:
# Combining it based on the tone times and the file path
concatted_first_and_last_porty_entry_dataframe = pd.merge(concatted_first_porty_entry_dataframe, concatted_last_porty_entry_dataframe,  how='left', left_on=['current_tone_time','file_path'], right_on = ['current_tone_time','file_path'], suffixes=('', '_y'))
# Dropping all columns that are duplicates
concatted_first_and_last_porty_entry_dataframe = concatted_first_and_last_porty_entry_dataframe.drop(concatted_first_and_last_porty_entry_dataframe.filter(regex='_y$').columns.tolist(), axis=1)

In [46]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,59.59,61.0
1,140.01,140.84,140.98,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,139.02,139.34
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,229.62,234.59
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,309.31,313.45
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,383.6,385.91


In [47]:
concatted_first_and_last_porty_entry_dataframe.shape

(7332, 8)

- Getting all the rows that have port entry to port exit time that overlaps with the tone playing
    - This would be the port entries that came before the tone, but had a port exit after

In [48]:
port_entries_that_overlap = concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["port_exit_after_last_port_entry_before_tone"] >= concatted_first_and_last_porty_entry_dataframe["current_tone_time"]]

In [49]:
port_entries_that_overlap.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,59.59,61.0
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,229.62,234.59
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,309.31,313.45
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,383.6,385.91
5,485.01,485.4,486.8,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,480.31,485.27


In [50]:
port_entries_that_overlap.shape

(1327, 8)

# Getting the Latency for Each Session of a Tone Playing

- To calculate latency, we would just subtract the time of the first port entry to that of the tone playing

In [51]:
concatted_first_and_last_porty_entry_dataframe["latency"] = concatted_first_porty_entry_dataframe["first_port_entry_after_tone"] - concatted_first_and_last_porty_entry_dataframe["current_tone_time"]

In [52]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,59.59,61.0,1.91
1,140.01,140.84,140.98,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,139.02,139.34,0.83
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,229.62,234.59,4.6
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,309.31,313.45,3.64
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,383.6,385.91,0.98


- For all the latencies that are are greater than 30 seconds, we changed the latency to 30 seconds

In [53]:
concatted_first_and_last_porty_entry_dataframe["latency_adjusted_greater_than_30"] = concatted_first_and_last_porty_entry_dataframe["latency"].apply(lambda x: 30 if x >= 30 else x)

In [54]:
concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["latency"] >= 30].head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30
82,385.01,427.48,427.77,./data/2022-09-20_14h57m_Subject 6.4.txt,20220920,6.4,373.66,373.83,42.47,30.0
83,485.01,535.05,535.1,./data/2022-09-20_14h57m_Subject 6.4.txt,20220920,6.4,455.68,455.75,50.04,30.0
98,1885.01,1961.61,1963.06,./data/2022-09-20_14h57m_Subject 6.4.txt,20220920,6.4,1829.05,1829.18,76.6,30.0
101,2130.01,2356.08,2356.12,./data/2022-09-20_14h57m_Subject 6.4.txt,20220920,6.4,2117.3,2118.28,226.07,30.0
102,2230.01,2356.08,2356.12,./data/2022-09-20_14h57m_Subject 6.4.txt,20220920,6.4,2117.3,2118.28,126.07,30.0


# Seeing which port entries and port exit durations overlap with the tone

- Making a column that states whether or not the last port entry before the tone has a port exit after the tone. This would mean that they are overlapping.

In [55]:
concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"] = concatted_first_and_last_porty_entry_dataframe.apply(lambda row: True if row["port_exit_after_last_port_entry_before_tone"] >= row["current_tone_time"] else False, axis=1)


In [56]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,59.59,61.0,1.91,1.91,True
1,140.01,140.84,140.98,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,139.02,139.34,0.83,0.83,False
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,229.62,234.59,4.6,4.6,True
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,309.31,313.45,3.64,3.64,True
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,383.6,385.91,0.98,0.98,True


- Comparing the latencies of the first port entry after the tone between these two groups:
    - 1. Those with the last port entry before the tone that has a port exit after the tone("overlapping" group)
    - 2. Those with the last port entry before the tone that has a port exit before the tone("nonoverlapping" group)

In [57]:
overlapping_df = concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"]]

In [58]:
non_overlapping_df = concatted_first_and_last_porty_entry_dataframe[~concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"]]

- Number of rows of overlapping and nonoverlapping last port entries before the tone

In [59]:
overlapping_df.shape

(1327, 11)

In [60]:
non_overlapping_df.shape

(6005, 11)

- Latency between the tone playing and the first port entry after the tone

In [61]:
overlapping_df["latency"].mean()

2.7307611152975677

In [62]:
non_overlapping_df["latency"].mean()

25.768976045883786

- Latency between the tone playing and the first port entry after the tone. This had been adjusted so that all latencies that are greater than 30 seconds were adjusted to be just 30 seconds.

In [63]:
overlapping_df["latency_adjusted_greater_than_30"].mean()

2.5216428033156553

In [64]:
non_overlapping_df["latency_adjusted_greater_than_30"].mean()

9.089591767881151

# Adjusting the Latencies if the Port Entries/Exit Overlap With the Tone Times

- For all the rows with overlapping last port entries before the tone, we will adjust the latency of the tone to the first port entry after the tone to 0. (Because the first port entry after the tone would be considered to be the same as the last port entry before the tone)

In [65]:
concatted_first_and_last_porty_entry_dataframe["latency_adjusted_overlap"] = concatted_first_and_last_porty_entry_dataframe.apply(lambda row: 0 if row["overlapping_port_entry_with_tone"] else row["latency_adjusted_greater_than_30"], axis=1)

In [66]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone,latency_adjusted_overlap
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,59.59,61.0,1.91,1.91,True,0.0
1,140.01,140.84,140.98,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,139.02,139.34,0.83,0.83,False,0.83
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,229.62,234.59,4.6,4.6,True,0.0
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,309.31,313.45,3.64,3.64,True,0.0
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,383.6,385.91,0.98,0.98,True,0.0


- All the rows with overlapping last port entries before the tone

In [67]:
concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["overlapping_port_entry_with_tone"]].head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone,latency_adjusted_overlap
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,59.59,61.0,1.91,1.91,True,0.0
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,229.62,234.59,4.6,4.6,True,0.0
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,309.31,313.45,3.64,3.64,True,0.0
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,383.6,385.91,0.98,0.98,True,0.0
5,485.01,485.4,486.8,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,480.31,485.27,0.39,0.39,True,0.0


## Labeling if a row's latency is less than 10 seconds or not

- We will be getting the ratio of latencies(from the time that the tone played, to the first port entry after the tone) that are less than 10 seconds for each session. So we will label all the adjusted latencies that are less than 10 seconds as `True`, and the those with latencies that are greater than 10 seconds as `False` 

In [68]:
concatted_first_and_last_porty_entry_dataframe["adjusted_30_second_latency_less_than_10_seconds_ratio"] = concatted_first_and_last_porty_entry_dataframe["latency_adjusted_greater_than_30"].apply(lambda x: True if x <= 10 else False)

In [69]:
concatted_first_and_last_porty_entry_dataframe["overlap_adjusted_latency_less_than_10_seconds"] = concatted_first_and_last_porty_entry_dataframe["latency_adjusted_overlap"].apply(lambda x: True if x <= 10 else False)

In [70]:
concatted_first_and_last_porty_entry_dataframe.head()

Unnamed: 0,current_tone_time,first_port_entry_after_tone,port_exit_after_first_port_entry_after_tone,file_path,date,subject,last_port_entry_before_tone,port_exit_after_last_port_entry_before_tone,latency,latency_adjusted_greater_than_30,overlapping_port_entry_with_tone,latency_adjusted_overlap,adjusted_30_second_latency_less_than_10_seconds_ratio,overlap_adjusted_latency_less_than_10_seconds
0,60.01,61.92,63.45,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,59.59,61.0,1.91,1.91,True,0.0,True,True
1,140.01,140.84,140.98,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,139.02,139.34,0.83,0.83,False,0.83,True,True
2,230.01,234.61,241.93,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,229.62,234.59,4.6,4.6,True,0.0,True,True
3,310.01,313.65,325.43,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,309.31,313.45,3.64,3.64,True,0.0,True,True
4,385.01,385.99,389.03,./data/2022-09-27_16h16m_Subject 1.1.txt,20220927,1.1,383.6,385.91,0.98,0.98,True,0.0,True,True


# Making Dataframes that have Each Subject as the Row and Each Date as the Column

- Pivot plots allow us to make columns out of each unique entry in a selected column. (For this, it will be the date column that we will turn into columns) The index for the rows will be each unique entry in another column. (For this, it will be the subject's ID) And the values will be the aggregate of the values that have both the values in the previous two selected columns. (For this it will be the latency columns and we will take the mean)
    - https://pandas.pydata.org/docs/reference/api/pandas.pivot_table.html

- Making a folder for all the pivotplots

# NOTE: If you decide to change the cage numbers or the dates used from the original ones from the imported dataframes, you will need to change the subsequent path to the cage number and dates you are using

In [71]:
first_port_entry_to_tone_latency_output_directory = "./proc/subject_rows_with_date_columns_dataframes/first_port_entry_to_tone_latency/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [72]:
first_port_entry_to_tone_latency_output_directory

'./proc/subject_rows_with_date_columns_dataframes/first_port_entry_to_tone_latency/cage_1_2_3_4_5_6_date_20220920_20220927'

In [73]:
os.makedirs(first_port_entry_to_tone_latency_output_directory, exist_ok=True)

In [74]:
latencies_less_than_10_seconds_ratio_output_directory = "./proc/subject_rows_with_date_columns_dataframes/latencies_less_than_10_seconds_ratio/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [75]:
latencies_less_than_10_seconds_ratio_output_directory

'./proc/subject_rows_with_date_columns_dataframes/latencies_less_than_10_seconds_ratio/cage_1_2_3_4_5_6_date_20220920_20220927'

In [76]:
os.makedirs(latencies_less_than_10_seconds_ratio_output_directory, exist_ok=True)

## Making a pivot plot with the original latency

In [77]:
all_latency_pivot_plot = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='latency', \
                                        index=['subject'], columns=['date'], aggfunc=np.mean)


In [78]:
all_latency_pivot_plot.head()

date,20220920,20220921,20220922,20220923,20220924,20220925,20220926,20220927
subject,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1.1,6.497179,6.126923,3.14,5.035128,5.318718,8.977179,3.201538,3.113077
1.2,6.070256,2.846154,2.799744,2.484615,2.259231,1.601026,1.212564,1.046923
1.3,36.800769,11.732051,8.205897,6.142051,3.894872,2.530513,2.799231,1.270256
1.4,20.379394,12.349487,5.755897,7.011795,6.360769,5.540513,3.579744,3.305641
2.1,8.437949,12.261026,5.110256,3.439487,1.792821,9.276923,1.777692,1.055385


- Getting the cage numbers for each subject and then making a dictionary out of it
    - This will be used to add the cage information to the pivot plots

In [79]:
metadata_df[metadata_df["cage"].isna()]

Unnamed: 0,index,File,Start Date,End Date,Subject,Experiment,Group,Box,Start Time,End Time,MSN,cage
14,./data/2022-09-27_17h40m_Subject 4.4.txt,C:\MED-PC\Data\2022-09-27_17h40m_Subject 4.4.txt,09/27/22,09/27/22,4.4,CD1_vs_C57_Comparison,,4,17:40:46,18:42:29,CD1_reward_training,
88,./data/2022-09-27_17h40m_Subject 4.3.txt,C:\MED-PC\Data\2022-09-27_17h40m_Subject 4.3.txt,09/27/22,09/27/22,4.3,CD1_vs_C57_Comparison,,3,17:40:46,18:42:29,CD1_reward_training,
133,./data/2022-09-27_17h40m_Subject 4.1.txt,C:\MED-PC\Data\2022-09-27_17h40m_Subject 4.1.txt,09/27/22,09/27/22,4.1,CD1_vs_C57_Comparison,,1,17:40:46,18:42:29,CD1_reward_training,
177,./data/2022-09-27_17h40m_Subject 4.2.txt,C:\MED-PC\Data\2022-09-27_17h40m_Subject 4.2.txt,09/27/22,09/27/22,4.2,CD1_vs_C57_Comparison,,2,17:40:46,18:42:29,CD1_reward_training,


In [80]:
# Converting the cage numbers from floats to integers
# When the metadata is imported from the csv files, the cage number is imported as a float
metadata_df["cage"] = metadata_df["cage"].astype(int)

IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer

In [None]:
subject_to_cage = pd.Series(metadata_df["cage"].values,index=metadata_df["Subject"].values).to_dict()

In [None]:
subject_to_cage

- Adding the cage information with the dictionary

In [None]:
all_latency_pivot_plot["cage"] = all_latency_pivot_plot.index.map(subject_to_cage)

In [None]:
all_latency_pivot_plot.head()

In [None]:
file_path = "original_latency_cage_{}_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)
output_path = os.path.join(first_port_entry_to_tone_latency_output_directory, file_path)
try:
    all_latency_pivot_plot.to_csv(output_path)
except:
    output_path = os.path.abspath(output_path)
    # Changing path if using on Windows because it raises an error if the path is longer than 260 characters
    # But adding this suffix bypasses this
    output_path = u"\\\\?\\" + output_path
    all_latency_pivot_plot.to_csv(output_path)

## Making a pivot plot with the latency that were greater than 30 seconds being adjusted 

In [None]:
latency_greater_than_30_seconds_adjusted_pivot_plot = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='latency_adjusted_greater_than_30', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [None]:
latency_greater_than_30_seconds_adjusted_pivot_plot["cage"] = latency_greater_than_30_seconds_adjusted_pivot_plot.index.map(subject_to_cage)

In [None]:
latency_greater_than_30_seconds_adjusted_pivot_plot.head()

In [None]:
file_path = "subject_to_date_latency_greater_than_30_seconds_adjusted_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)
output_path = os.path.join(first_port_entry_to_tone_latency_output_directory, file_path)
try:
    latency_greater_than_30_seconds_adjusted_pivot_plot.to_csv(output_path)
except:
    output_path = os.path.abspath(output_path)
    # Changing path if using on Windows because it raises an error if the path is longer than 260 characters
    # But adding this suffix bypasses this
    output_path = u"\\\\?\\" + output_path
    latency_greater_than_30_seconds_adjusted_pivot_plot.to_csv(output_path)

## Making a pivot plot with adjusted latencies that overlapped with a tone playing

In [None]:
latency_that_overlaps_with_tone_adjusted_pivot_plot = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='latency_adjusted_overlap', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [None]:
latency_that_overlaps_with_tone_adjusted_pivot_plot["cage"] = latency_that_overlaps_with_tone_adjusted_pivot_plot.index.map(subject_to_cage)

In [None]:
latency_that_overlaps_with_tone_adjusted_pivot_plot.head()

In [None]:
file_path = "subject_to_date_latency_that_overlaps_with_tone_adjusted_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)
output_path = os.path.join(first_port_entry_to_tone_latency_output_directory, file_path)
try:
    latency_that_overlaps_with_tone_adjusted_pivot_plot.to_csv(output_path)
except:
    output_path = os.path.abspath(output_path)
    # Changing path if using on Windows because it raises an error if the path is longer than 260 characters
    # But adding this suffix bypasses this
    output_path = u"\\\\?\\" + output_path
    latency_that_overlaps_with_tone_adjusted_pivot_plot.to_csv(output_path)

## Making a pivot plot with the ratio of original latencies that are less than 10 seconds

In [None]:
original_less_than_10_latency_df = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='adjusted_30_second_latency_less_than_10_seconds_ratio', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [None]:
original_less_than_10_latency_df["cage"] = original_less_than_10_latency_df.index.map(subject_to_cage)

In [None]:
original_less_than_10_latency_df

In [None]:
file_path = "subject_to_date_less_than_10_seconds_original_latencies_ratios_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)
output_path = os.path.join(first_port_entry_to_tone_latency_output_directory, file_path)
try:
    original_less_than_10_latency_df.to_csv(output_path)
except:
    output_path = os.path.abspath(output_path)
    # Changing path if using on Windows because it raises an error if the path is longer than 260 characters
    # But adding this suffix bypasses this
    output_path = u"\\\\?\\" + output_path
    original_less_than_10_latency_df.to_csv(output_path)

## Making a pivot plot with the ratio of overlap adjusted latencies that are less than 10 seconds

In [None]:
overlap_adjusted_less_than_10_latency_df = pd.pivot_table(concatted_first_and_last_porty_entry_dataframe, values='overlap_adjusted_latency_less_than_10_seconds', index=['subject'],

                    columns=['date'], aggfunc=np.mean)


In [None]:
overlap_adjusted_less_than_10_latency_df["cage"] = overlap_adjusted_less_than_10_latency_df.index.map(subject_to_cage)

In [None]:
overlap_adjusted_less_than_10_latency_df

In [None]:
overlap_adjusted_less_than_10_latency_df.to_csv(os.path.join(latencies_less_than_10_seconds_ratio_output_directory, "subject_to_date_less_than_10_seconds_overlap_adjusted_latencies_ratios_cage_{}_date_{}_{}.csv".format(cage_numbers_for_title, earliest_date, latest_date)))
overlap_adjusted_less_than_10_latency_df.to_excel(os.path.join(latencies_less_than_10_seconds_ratio_output_directory, "subject_to_date_less_than_10_seconds_overlap_adjusted_latencies_ratios_cage_{}_date_{}_{}.xlsx".format(cage_numbers_for_title, earliest_date, latest_date)))

# Plotting the latencies

## Getting the averages for all the trials in one subject and one day

- We will plot a line for each subject. With the X-axis being the days since the first session. And the Y-axis is the latency values averaged across all the trials for one recording session
- Grouping all the rows with the same subject and date(aka, all the trials in one session). And then getting the mean for each value

In [None]:
grouped_averaged_first_porty_entry_dataframe = concatted_first_and_last_porty_entry_dataframe.groupby(by=["file_path", "date", "subject"]).mean()

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

## Reformatting the Dataframe for plotting

- Resetting the index so that we can use the data as a value

In [None]:
grouped_averaged_first_porty_entry_dataframe = grouped_averaged_first_porty_entry_dataframe.reset_index()

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

- Turning the date column which is a string, into an integer
    - This will allow us to use the date as the X-axis for plotting

In [None]:
grouped_averaged_first_porty_entry_dataframe["date_int"] = grouped_averaged_first_porty_entry_dataframe["date"].astype(int)

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

- Adding the cage information
    - We will make one plot for each cage

In [None]:
grouped_averaged_first_porty_entry_dataframe["cage"] = grouped_averaged_first_porty_entry_dataframe["subject"].map(subject_to_cage).astype(int)

In [None]:
grouped_averaged_first_porty_entry_dataframe.head()

## Plotting the latencies that were adjusted if they greater than 30 seconds

In [None]:
original_average_latency_output_directory = "./proc/plots/original_average_latency_plots/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
original_average_latency_output_directory

In [None]:
os.makedirs(original_average_latency_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["latency_adjusted_greater_than_30"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 30)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Adjusted Average Latency of First Entry to Tone Onset")
    ax.set_title("Latency of Port Entry to Tone: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    file_name = "original_average_latency_plot_cage_{}_date_{}_{}.png".format(cage, earliest_date, latest_date)
    plt.savefig(os.path.join(original_average_latency_output_directory, file_name))

## Plotting the original ratio of latencies that are less than 10 seconds

In [None]:
less_than_10_second_original_latency_ratio_plots_output_directory = "./proc/plots/less_than_10_second_original_latency_ratio/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
less_than_10_second_original_latency_ratio_plots_output_directory

In [None]:
os.makedirs(less_than_10_second_original_latency_ratio_plots_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["adjusted_30_second_latency_less_than_10_seconds_ratio"], '-o', label=subject)
    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 1)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Proportion of Latencies")
    ax.set_title("Less Than 10 Seconds Latencies from Tone Onset: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    
    file_name = "less_than_10_second_original_latency_ratio_plot_cage_{}_date_{}_{}.png".format(cage, earliest_date, latest_date)
    plt.savefig(os.path.join(less_than_10_second_original_latency_ratio_plots_output_directory, file_name))


# Plotting the Adjusted Latencies for Entries that Overlap with the Tone

## Plotting the Latencies 

In [None]:
overlap_adjusted_average_latency_plots_output_directory = "./proc/plots/overlap_adjusted_average_latency_plots/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
overlap_adjusted_average_latency_plots_output_directory

In [None]:
os.makedirs(overlap_adjusted_average_latency_plots_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["latency_adjusted_overlap"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 30)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Latencies (Seconds)")
    ax.set_title("Overlap Adjusted Latency of Port Entry to Tone: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    file_name = "overlap_adjusted_average_latency_plot_cage_{}_date_{}_{}.png".format(cage, earliest_date, latest_date)

    plt.savefig(os.path.join(overlap_adjusted_average_latency_plots_output_directory, file_name))


# Plotting the ratio of overlap adjusted latencies that are less than 10 seconds

In [None]:
less_than_10_second_overlap_adjusted_latency_ratio_output_directory = "./proc/plots/less_than_10_second_overlap_adjusted_latency_ratio/cage_{}_date_{}_{}".format(cage_numbers_for_title, earliest_date, latest_date)

In [None]:
less_than_10_second_overlap_adjusted_latency_ratio_output_directory

In [None]:
os.makedirs(less_than_10_second_overlap_adjusted_latency_ratio_output_directory, exist_ok=True)

In [None]:
# Plotting for each cage
for cage in grouped_averaged_first_porty_entry_dataframe["cage"].unique():
    fig, ax = plt.subplots()
    # Getting all the rows with the current cage number
    cage_df = grouped_averaged_first_porty_entry_dataframe[grouped_averaged_first_porty_entry_dataframe["cage"] == cage]
    
    # Plotting a line for each subject
    for subject in cage_df["subject"].unique():
        # Getting all the rows with the current subject
        subject_df = cage_df[cage_df["subject"] == subject]
        # Making the dates into days after the first session by subtracting all the dates by the first date
        ax.plot(subject_df["date_int"] - subject_df["date_int"].min() + 1, subject_df["overlap_adjusted_latency_less_than_10_seconds"], '-o', label=subject)

    # Setting the Y-Axis to only plot from 0 to 30 because we adjusted the latency to always be under 30
    ax.set_ylim(0, 1)
    # Labeling the X/Y Axis and the title
    ax.set_xlabel("The Days After the First Session")
    ax.set_ylabel("Proportion of Latencies")
    ax.set_title("Overlap Adjusted Less Than 10sec Latencies from Tone: Cage {}".format(cage))
    # To show the legend
    ax.legend()
    
    file_name = "less_than_10_second_overlap_adjusted_latency_ratio_plot_cage{}_date_{}_{}.png".format(cage, earliest_date, latest_date)
    plt.savefig(os.path.join(less_than_10_second_overlap_adjusted_latency_ratio_output_directory, file_name))


# Temp

In [None]:
concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["subject"] == 3.4]

In [None]:
concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["subject"] == 4.4]

In [None]:
concatted_first_and_last_porty_entry_dataframe[concatted_first_and_last_porty_entry_dataframe["subject"] == 3.4]

In [None]:
concatted_first_and_last_porty_entry_dataframe[(concatted_first_and_last_porty_entry_dataframe["subject"] == 3.4) & (concatted_first_and_last_porty_entry_dataframe["latency"] <= 6000) & (concatted_first_and_last_porty_entry_dataframe["date"] == 20220921)]

In [None]:
concatted_first_and_last_porty_entry_dataframe[(concatted_first_and_last_porty_entry_dataframe["subject"] == 3.4) & (concatted_first_and_last_porty_entry_dataframe["date"] == 20220921)]

In [None]:
concatted_first_and_last_porty_entry_dataframe[(concatted_first_and_last_porty_entry_dataframe["latency"] <= 30)]