In [1]:
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ot
import ot.plot
import pickle
import os
import re

In [2]:
def extract_six_digit_date(string):
    # Use expression in SummaryDF to extract the 6 digit date
    # since the 6digit date is how our folders are named
    match = re.search(r'(\d{1,2})/(\d{1,2})/(\d{4})', string)
    
    if match:
        # Extract the month, day, and year
        month = match.group(1)
        day = match.group(2)
        year = match.group(3)[-2:]  # Take the last two digits of the year
        
        # Format the date as MMDDYY
        six_digit_date = "{:02}{:02}{}".format(int(month), int(day), year)
        
        return six_digit_date
    
    return None

In [3]:
def OpenFiles(path, mouse, date):
    # opens and saves all the info from the files I need for finding the lick information. 
    # mouse = mouseID
    # date = 6digit date == folder name
    # path = filepath to get to mouse folder
    
    lick = []
    with open(f'{path}/{mouse}/{date}/TsEvents/Lick.csv', 'r') as l:
        lreader = csv.reader(l)
        for row in lreader:
            lick.append(row[0])
    lick = np.array(lick).astype(float)
    
    cold = []
    with open(f'{path}/{mouse}/{date}/TsEvents/ColdTS.csv', 'r') as c:
        creader = csv.reader(c)
        for row in creader:
            cold.append(row[0])
    cold = np.array(cold).astype(float)
   
    hot = []
    with open(f'{path}/{mouse}/{date}/TsEvents/HotTS.csv', 'r') as h:
        hreader = csv.reader(h)
        for row in hreader:
            hot.append(row[0])
    hot = np.array(hot).astype(float)
    
    rt = []
    with open(f'{path}/{mouse}/{date}/TsEvents/RTTS.csv', 'r') as r:
        rreader = csv.reader(r)
        for row in rreader:
            rt.append(row[0])
    rt = np.array(rt).astype(float)
    
    NeuronData = []
    NeuronFileNames = NeuronFileNameLs(path,mouse,date)
    NLs = []
    for file in NeuronFileNames:
        with open(f'{path}/{mouse}/{date}/TsEvents/NEURON/{file}', 'r') as neuronfile:
            neuronReader = csv.reader(neuronfile)
            for row in neuronReader:
                NLs.append(row[1])
        del NLs[0]
        NeuronData.append(NLs)
        NLs = []
    NeuronData = [[round(float(num),3) for num in sublist] for sublist in NeuronData]
 
    return lick, cold, hot, rt, NeuronData, NeuronFileNames

def NeuronFileNameLs(path,mouse,date):
    # make a list of all the neuron file names for this mouse/day recording session
    folder_path = f'{path}/{mouse}/{date}/TsEvents/NEURON'
    file_names = os.listdir(folder_path)
    files = []
    for file_name in file_names:
        files.append(file_name)
    return files        

def binary_placer(StimTime, EventTimes):
    # StimTime = time of stimulus delivery, will be new zerotime.
    # EventTimes = times within our 4s window that the event occured
    # returns 1x4000 np.array with index=time
        # 0 when no event has occured, 1 when an event occured.
    EventData = np.zeros(4000)
    if len(EventTimes) != 0:
        EventTimes = np.array([i - StimTime for i in EventTimes]).astype(float)
        EventTimes = ((EventTimes.round(3)*1000)+2000).astype(int)
        for l in EventTimes:
            EventData[l] = 1.0
    
    return EventData

def find_elements_between_values(lst, lower_limit, upper_limit):
    # find all elements in list between limits
    # for finding the licks/spikes for a particular trial
        # since we say a trial is the 2s before and after stimulus delivery
    result = []
    for element in lst:
        if ((lower_limit < element) and (element < upper_limit)):
            result.append(element)
    return result

# Process all of the Temperature Data Recordings

Goal: take all of the .csv files with data and create one large .pickle file containing all of the neuronID's, MouseID's, TasteID, Neuron#, Trial, and the data in one large dataframe (well, one for Lick information and one for Neuron information)

We can't utilize the AllN.pickle files in each seperate recording's file for their data bc it's unclear which trial(s) that Roberto's lab dropped when creating those files. 

### First, open the summary file
This will give us all of our mouse/date information for later. We need to make a new column that takes our "Date Exp" column into a 6digit string, which is how all of our folders are named. 

Also, we remove a row of the summary file, which had a recording for taste data instead of temperature data.

In [4]:
SummaryDF = pd.read_csv ('C:/Users/nasha/OneDrive - Florida State University/temp_files_extraneous/Summary_dataset.csv',
                         usecols= ['Mouse','Date Exp'])
SummaryDF = SummaryDF.drop([39,40,41])
    # these three are empty rows that show up in the .csv for some reason. let's just delete.
     # the Summary file should contain unique combinations of dates and mouseID's

In [5]:
DateFolder = []
for row in range(len(SummaryDF)):
    DateFolder.append(extract_six_digit_date(SummaryDF.iloc[row,1]))
SummaryDF['DateFolder'] = DateFolder
SummaryDF.drop(SummaryDF.tail(1).index, inplace=True)
# dropping last row bc it describes information that comes from a taste trial
# if we want to look at that, we can open it seperately.
# I don't want to put it in this .pickle file

In [6]:
SummaryDF

Unnamed: 0,Mouse,Date Exp,DateFolder
0,CB279,12/16/2021,121621
1,CB279,12/14/2021,121421
2,CB279,12/20/2021,122021
3,CB278,12/15/2021,121521
4,CB278,12/17/2021,121721
5,CB278,12/19/2021,121921
6,CB295,4/2/2022,40222
7,CB295,4/6/2022,40622
8,CB295,3/30/2022,33022
9,CB293,4/2/2022,40222


### Create one DF for lick information and one DF for spike information



In [12]:
# taste decoding - 0: cold, 1: hot, 2: room temp
taste = [0,1,2]

# counter is to correctly number neurons as we iterate through files
counter = 0

path = 'C:/Users/nasha/OneDrive - Florida State University/Temperature session'

# for renaming the dataframe columns:
columns = range(-2000,2000)
new_columns = [col for col in columns]

# for storing the data before making it a df
Ldata = []
Ndata = []
linfo = []
ninfo = []

In [13]:
for row in range(len(SummaryDF)):
    # first, establish mouse & date. then open all files and save data.
    mouseID = SummaryDF.iloc[row,0]
    DateFolderName = SummaryDF.iloc[row,2]
    
    # lick contains timestamps for every time the animal licked the spout. same for every neuron, changes only for new trials.
    # len(NeuronSpikeData) = #of neurons for this recording session.
        # each entry is a list that contains timestamps for every time that neuron fired.
    lick, cold, hot, rt, NeuronSpikeData, nfilenames = OpenFiles(path, mouseID, DateFolderName)

    # Temperature stimulus: points in here are timestamps of stimulus delivery
    # these will become the new "zero" timestamps for each trial.
    tempstem = [cold,hot,rt]

    # now interatively place spike and lick information in the lists est. above.
    for tastecode in taste:
        for trial in range(len(tempstem[tastecode])):
            lick_times = find_elements_between_values(lick,tempstem[tastecode][trial]-2,tempstem[tastecode][trial]+1.999)
            LickData = binary_placer(tempstem[tastecode][trial], lick_times)
            for neuron in range(len(NeuronSpikeData)):
                spike_times = find_elements_between_values(NeuronSpikeData[neuron],tempstem[tastecode][trial]-2,tempstem[tastecode][trial]+1.999)
                SpikeData = binary_placer(tempstem[tastecode][trial], spike_times)
                Ndata.append(SpikeData)

                ltrial_info = ['Lick', mouseID, DateFolderName, nfilenames[neuron].split('.')[0], tastecode, neuron+counter, trial]
                ntrial_info = ['Neuron', mouseID, DateFolderName, nfilenames[neuron].split('.')[0], tastecode, neuron+counter, trial]

                Ldata.append(LickData) 
                linfo.append(ltrial_info)
                ninfo.append(ntrial_info)
    counter = counter + len(NeuronSpikeData) 

# put all data into dataframes, attach informative columns to the beginning, rename columns.
LickDF = pd.DataFrame(Ldata)
LickDF.columns = new_columns

NeuronDF = pd.DataFrame(Ndata)
NeuronDF.columns = new_columns

LInfoDF = pd.DataFrame(linfo)
LInfoDF.columns = ['Recording Type', 'MouseID', 'Date', 'NeuronID', 'Taste', 'Neuron', 'Trial']

NInfoDF = pd.DataFrame(ninfo)
NInfoDF.columns = ['Recording Type', 'MouseID', 'Date', 'NeuronID', 'Taste', 'Neuron', 'Trial']

LickDF = pd.concat([LInfoDF, LickDF], axis=1)
NeuronDF = pd.concat([NInfoDF, NeuronDF], axis=1)

In [14]:
# write both of our dataframes to pickle files
NeuronDF.to_pickle(f'{path}/NeuronDF.pickle')
LickDF.to_pickle(f'{path}/LickDF.pickle')

In [15]:
LickDF

Unnamed: 0,Recording Type,MouseID,Date,NeuronID,Taste,Neuron,Trial,-2000,-1999,-1998,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,Lick,CB279,121621,SU_20,0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Lick,CB279,121621,SU_44,0,1,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Lick,CB279,121621,SU_48,0,2,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Lick,CB279,121621,SU_51,0,3,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Lick,CB279,121621,SU_54,0,4,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34875,Lick,CB317,111022,SU_68,2,428,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34876,Lick,CB317,111022,SU_74,2,429,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34877,Lick,CB317,111022,SU_82,2,430,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34878,Lick,CB317,111022,SU_89,2,431,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
NeuronDF

Unnamed: 0,Recording Type,MouseID,Date,NeuronID,Taste,Neuron,Trial,-2000,-1999,-1998,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
0,Neuron,CB279,121621,SU_20,0,0,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Neuron,CB279,121621,SU_44,0,1,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,Neuron,CB279,121621,SU_48,0,2,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Neuron,CB279,121621,SU_51,0,3,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Neuron,CB279,121621,SU_54,0,4,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34875,Neuron,CB317,111022,SU_68,2,428,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34876,Neuron,CB317,111022,SU_74,2,429,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34877,Neuron,CB317,111022,SU_82,2,430,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
34878,Neuron,CB317,111022,SU_89,2,431,29,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Now, check against a trial from the lab's analysis

In [18]:
SummaryDF.iloc[0,:]

Mouse              CB279
Date Exp      12/16/2021
DateFolder        121621
Name: 0, dtype: object

In [24]:
with open(f'{path}/{SummaryDF.iloc[0,0]}/{SummaryDF.iloc[0,2]}/SU_Analysis/AllN.pickle', 'rb') as df:
    NDF = pickle.load(df)

# this trial is from the lab's analysis: 
# index of 'trial' changes bc they dropped the first trial from this neuron
NDF_test = NDF[(NDF['Taste'] == 0) & (NDF['Trial'] == 9)
                    & (NDF['n_ID'] == 'SU_20.csv')]
    # specifiy n_ID instead of Neuron bc the way i assign Neuron is order-dependent 
    # i.e. won't match the lab. matching n_ID to Neuron_ID instead will be consistent.

# this trial is from our DF we created:
    # checks that we can open the pickle file and that the trial matches!
Test_NeuronDF = NeuronDF[(NeuronDF['Taste'] == 0) & (NeuronDF['Trial'] == 10)
                         & (NeuronDF['NeuronID'] == 'SU_20') 
                         & (NeuronDF['MouseID'] == SummaryDF.iloc[0,0])]

In [25]:
NDF_test

Unnamed: 0,Recording Type,MouseID,Date,n_ID,Taste,Neuron,Trial,-2000,-1999,-1998,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
96,Neuron,CB279,121621,SU_20.csv,0,1,9,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
Test_NeuronDF

Unnamed: 0,Recording Type,MouseID,Date,NeuronID,Taste,Neuron,Trial,-2000,-1999,-1998,...,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999
70,Neuron,CB279,121621,SU_20,0,0,10,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [1]:
plt.plot(NDF_test.iloc[0,7:], label= 'Lab Trial')
plt.plot(Test_NeuronDF.iloc[0,7:], label='DF Trial')
plt.ylabel('Spike Count')
plt.xlabel('Time (ms): T0 = stimulus delivery')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)
plt.show()

NameError: name 'plt' is not defined