In [1]:
# collection of functions to calculate performance from txt files
import pandas as pd

MAX_DURATION=300 # maximal Intertrial duration in seconds 


#------ Low level functions to parse .txt files
def prs(x):
    res =x.split(':')
    res[1]=str(res[1]).lower().strip()
    if  res[1] == 'ys':
        res[1]='yes'
    return res[1]

def prsDate(x):
    return pd.datetime.strptime(x, 'Time: %H:%M:%S_%d/%m/%Y')

def prsRT(x):
    return float(prs(x))/1000 

# read a file in a pandas format with fields [event - answer - rt - time]
def fileReader(filename):
    #print('Reading: ',filename)
    return pd.read_csv(filename, sep='\t', lineterminator='\n',header=None,
                converters={0:prs,1:prs,2:prsRT,3:prsDate},
                names=['event','answer','rt','time'])

# This function reads a folder and converts files to a pandas dataframe with fields: 
# event [both, right or left]
# answer [both_dx(right button),both_sx(left button),left,right]
# rt [reaction time in sec]
# trial_time [timestamp of the trial]
# intertrial [intertrial interval in sec]
# type [habituation, shaping, operant]
# hit [0:wrond 1:correct answer, in both all answer are 1]

def readSession(files):
    df_from_each_file = (fileReader(f) for f in files)
    sessions=pd.concat(df_from_each_file, ignore_index=True)
    pd.options.mode.chained_assignment = None 
    
    sessions['trial_time']=sessions['time']
    sessions['intertrial']=sessions['trial_time'] - sessions['trial_time'].shift(1)
    sessions['intertrial']=sessions['intertrial'].dt.total_seconds()
    temp = sessions['intertrial']>MAX_DURATION
    temp2 = sessions['intertrial']<=0
    sessions['intertrial'][temp]=np.NaN
    sessions['intertrial'][temp2]=np.NaN
    
    sessions=sessions.set_index(['time'])
    
    left=sessions['event']=='left'
    right= sessions['event']=='right'
    reward=sessions['event']=='reward'
    both= sessions['event']=='both'
    
    yes= sessions['answer']=='yes'
    no= sessions['answer']=='no'
    dx= sessions['answer']=='both_dx'
    sx= sessions['answer']=='both_sx'

    sessions['type']='None'
    sessions['type'][left | right]='operant'
    sessions['type'][reward]='habituation'
    sessions['type'][both]='shaping'
    
    sessions['hit']=0
    sessions['hit'][(left | right ) & yes]=1
    sessions['hit'][(dx | sx ) & both]=1
    pd.options.mode.chained_assignment = 'warn'
    return sessions

# This function calculates performances during shaping and returns a pandas df with fields: 
# event [number of total events]
# rt [mean reaction time in sec]
# intertrial [mean intertrial time in sec]
# intertrial_median [mean intertrial time in sec]
# rt_median [median reaction time in sec]
# right [number of right events]
# hit_right [correctness 0 to 1 ]
# left [number of left events]
# hit_left [correctness 0 to 1 ] 
# rt_yes [mean reaction time in sec]
# rt_no [mean reaction time in sec]
# rt_yes_median [median reaction time in sec]
# rt_no_median [median reaction time in sec]

def shapingPerformance(sessions):
    training = sessions['type']=='shaping'
    trainings=sessions[training]

    # Prestazione Totale
    totali=trainings.groupby( [trainings.index.year, trainings.index.month, trainings.index.day] ).count()
    successi=trainings.groupby( [trainings.index.year, trainings.index.month, trainings.index.day] ).sum()
    medie=trainings.groupby( [trainings.index.year, trainings.index.month, trainings.index.day] ).mean()
    mediane=trainings.groupby( [trainings.index.year, trainings.index.month, trainings.index.day] ).median()

   
    # prestazioni specifiche
    both=trainings['event']=='both'
    boths = trainings[both]

    conta=boths.groupby( [boths.index.year, boths.index.month, boths.index.day] ).sum()
    
    perc=totali
    boolEvent =perc['event']>90 ####
    perc['event'][boolEvent]=90
    perc['rt']=medie['rt']
    perc['intertrial']=medie['intertrial']
    perc['intertrial_median']=mediane['intertrial']
    perc['rt_median']=mediane['rt']
    perc.drop('type', axis=1, inplace=True)
    perc.drop('answer', axis=1, inplace=True)
    perc.drop('trial_time', axis=1, inplace=True)
    
    perc['day']=range(0,len(perc))
    return perc

# This function calculates performances during operant task and returns a pandas df with fields: 
# event [number of total events]
# rt [mean reaction time in sec]
# intertrial [mean intertrial time in sec]
# hit [relative correctness 0 to 1] 
# intertrial_median [mean intertrial time in sec]
# rt_median [median reaction time in sec]
# day [int with the current day]

def operantPerformance(sessions):
    simple = sessions['type']=='operant'
    simples=sessions[simple]

    # Total performance
    totali=simples.groupby( [simples.index.year, simples.index.month, simples.index.day] ).count()
    successi=simples.groupby( [simples.index.year, simples.index.month, simples.index.day] ).sum()
    medie=simples.groupby( [simples.index.year, simples.index.month, simples.index.day] ).mean()
    mediane=simples.groupby( [simples.index.year, simples.index.month, simples.index.day] ).median()

    # Specific performance
    right=simples['event']=='right'
    left=simples['event']=='left'
    rights = simples[right]
    lefts = simples[left]
    
    # performance yes o no
    yes=simples['answer']=='yes'
    no=simples['answer']=='no'
    yess = simples[yes]
    nos = simples[no]
    
    totaliRight=rights.groupby( [rights.index.year, rights.index.month, rights.index.day] ).count()
    successiRight=rights.groupby( [rights.index.year, rights.index.month, rights.index.day] ).sum()
    
    totaliLeft=lefts.groupby( [lefts.index.year, lefts.index.month, lefts.index.day] ).count()
    successiLeft=lefts.groupby( [lefts.index.year, lefts.index.month, lefts.index.day] ).sum()
    
    medieYes=yess.groupby( [yess.index.year, yess.index.month, yess.index.day] ).mean()
    medieNo=nos.groupby( [nos.index.year, nos.index.month, nos.index.day] ).mean()
    medianeYes=yess.groupby( [yess.index.year, yess.index.month, yess.index.day] ).mean()
    medianeNo=nos.groupby( [nos.index.year, nos.index.month, nos.index.day] ).median()
    
    perc=totali
    boolEvent =perc['event']>90 ####
    perc['event'][boolEvent]=90
    perc['hit']=successi['hit']/totali['hit']
    perc['rt']=medie['rt']
    perc['intertrial']=medie['intertrial']
    perc['intertrial_median']=mediane['intertrial']
    perc['rt_median']=mediane['rt']
    perc['right']=totaliRight['event']
    perc['hit_right']=successiRight['hit']/totaliRight['hit']
    perc['left']=totaliLeft['event']
    perc['hit_left']=successiLeft['hit']/totaliLeft['hit']
    perc['rt_yes']=medieYes['rt']
    perc['rt_no']=medieNo['rt']
    perc['rt_yes_median']=medianeYes['rt']
    perc['rt_no_median']=medianeNo['rt']
    perc.drop('type', axis=1, inplace=True)
    perc.drop('answer', axis=1, inplace=True)
    perc.drop('trial_time', axis=1, inplace=True)
    
    return perc


In [3]:
# Reading all sessions
import glob
import os

# path to subject folder
pth = os.getcwd() + '/data/F-0B0-WT/'
# listing all the file contained in folder
cf=glob.glob(os.path.join(pth, "*.txt"))
# tranform files in a pandas database
session = readSession(cf)
session.sort_index()

Unnamed: 0_level_0,event,answer,rt,trial_time,intertrial,type,hit
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-10 10:17:27,reward,reward,,2018-01-10 10:17:27,,habituation,0
2018-01-10 10:17:46,reward,reward,,2018-01-10 10:17:46,19.0,habituation,0
2018-01-10 10:18:16,reward,reward,,2018-01-10 10:18:16,30.0,habituation,0
2018-01-10 10:18:44,reward,reward,,2018-01-10 10:18:44,28.0,habituation,0
2018-01-10 10:19:12,reward,reward,,2018-01-10 10:19:12,28.0,habituation,0
...,...,...,...,...,...,...,...
2018-04-24 12:11:33,left,no,10.938,2018-04-24 12:11:33,15.0,operant,0
2018-04-24 12:12:22,right,yes,42.702,2018-04-24 12:12:22,49.0,operant,1
2018-04-24 12:12:37,right,yes,9.699,2018-04-24 12:12:37,15.0,operant,1
2018-04-24 12:13:25,right,no,30.272,2018-04-24 12:13:25,48.0,operant,0


In [129]:
# calculating shaping performances over time
shapingPerformance(session)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,event,rt,intertrial,hit,intertrial_median,rt_median,day
time,time,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018,1,11,58,10.733621,33.2,58,31.0,6.4725,0
2018,1,12,69,5.903493,26.121212,69,22.0,4.267,1
2018,1,13,83,4.582157,22.7125,83,19.0,3.739,2


In [130]:
# calculating operant tsk performances over time
operantPerformance(session)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,event,rt,intertrial,hit,intertrial_median,rt_median,right,hit_right,left,hit_left,rt_yes,rt_no,rt_yes_median,rt_no_median
time,time,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2018,1,16,77,3.583974,22.783784,0.766234,16.5,2.605,36,0.861111,41,0.682927,3.594119,3.550722,3.594119,2.0295
2018,1,17,77,2.909351,23.837838,0.727273,16.5,1.79,39,0.666667,38,0.789474,2.931054,2.851476,2.931054,3.392
2018,1,18,90,1.763278,15.390805,0.711111,12.0,1.316,45,0.511111,45,0.911111,1.988531,1.208808,1.988531,0.661
2018,1,19,90,2.088867,14.942529,0.788889,13.0,1.392,45,0.733333,45,0.844444,2.297099,1.310737,2.297099,0.886
2018,1,20,90,3.890356,16.0,0.866667,15.0,2.5105,45,0.888889,45,0.844444,3.656551,5.410083,3.656551,4.845
2018,1,26,30,1.948867,12.517241,0.833333,10.0,1.1145,15,0.733333,15,0.933333,2.19252,0.7306,2.19252,0.046
2018,2,15,30,1.881133,15.689655,0.8,13.0,1.9005,15,0.733333,15,0.866667,2.165,0.745667,2.165,0.0395
2018,3,15,30,3.157267,17.0,0.866667,15.0,2.528,15,0.866667,15,0.866667,2.933154,4.614,2.933154,4.788
2018,4,24,26,12.196115,22.32,0.807692,17.0,5.0285,14,0.785714,12,0.833333,9.480714,23.6008,9.480714,29.205


## Reading all the dataset
    Code Assumtions:
    1. There is an experiment folder containing only subjects subfolders
    2. Each subfolder represents a subjects formatted as CAGE-LABEL-GENO [e.g. F-0B0-WT ]
    3. Each subfolder must contains the same number of files
    4. Running this code will returns two dataframes:
        a. 'shaping' [containing data from the shaping task]
        b. 'operant' [containing data from the operant task]


In [5]:


path = os.getcwd() + '/data/'

cf=glob.glob(os.path.join(path, "*/"))
# initializing df 
shaping = pd.DataFrame(columns=['cage', 'label', 'geno']) 
operant = pd.DataFrame(columns=['cage', 'label', 'geno']) 
for f in range(len(cf)):
    (dire,sub) = os.path.split(cf[f][:-1])
    shapSub = sub.split('-')
    operSub = sub.split('-')
    all_files = glob.glob(os.path.join(cf[f], "*.txt"))
    sessions = readSession(all_files)
    percTr = shapingPerformance(sessions)
    percSimp = operantPerformance(sessions)
    print(cf[f])
    #print(percSimp)
    shapSub.extend(percTr['event'].values)
    shapSub.extend(percTr['rt_median'].values) #rt_median
    shapSub.extend(percTr['intertrial_median'].values)
    
    operSub.extend(percSimp['hit'].values)
    operSub.extend(percSimp['rt_median'].values)
    operSub.extend(percSimp['intertrial_median'].values)
    operSub.extend(percSimp['rt_yes_median'].values)
    operSub.extend(percSimp['rt_no_median'].values)
    
    # adding necessary columns
    if f==0:
        col_ev =  ['hit_' + str(h) for h in range(0,len(percTr['event'].values))]
        col_rtm =  ['rt_' + str(h) for h in range(0,len(percTr['rt_median'].values))]
        col_it =  ['intertrial_' + str(h) for h in range(0,len(percTr['intertrial_median'].values))]
        shaping = shaping.reindex( columns=shaping.columns.tolist() + col_ev + col_rtm + col_it)
        
        col_ev =  ['hit_' + str(h) for h in range(0,len(percSimp['hit'].values))]
        col_rtm =  ['rt_' + str(h) for h in range(0,len(percSimp['rt_median'].values))]
        col_it =  ['intertrial_' + str(h) for h in range(0,len(percSimp['intertrial_median'].values))]
        col_rtym =  ['rt_yes_' + str(h) for h in range(0,len(percSimp['rt_yes_median'].values))]
        col_rtnm =  ['rt_no_' + str(h) for h in range(0,len(percSimp['rt_no_median'].values))]
        operant = operant.reindex( columns=operant.columns.tolist() + col_ev + col_rtm + col_it + col_rtym + col_rtnm)
        
    shaping.loc[f]=shapSub
    operant.loc[f]=operSub


shaping.index= range(len(shaping))
shaping


/Users/tardis/Desktop/conditioning/dataset/data/F-R00-WT/
/Users/tardis/Desktop/conditioning/dataset/data/F-0R0-WT/
/Users/tardis/Desktop/conditioning/dataset/data/R-R00-WT/
/Users/tardis/Desktop/conditioning/dataset/data/F-0B0-WT/
/Users/tardis/Desktop/conditioning/dataset/data/M-000-WT/
/Users/tardis/Desktop/conditioning/dataset/data/F-B00-WT/


Unnamed: 0,cage,label,geno,hit_0,hit_1,hit_2,rt_0,rt_1,rt_2,intertrial_0,intertrial_1,intertrial_2
0,F,R00,WT,43.0,51.0,86.0,9.88,4.556,2.936,38.5,29.5,18.0
1,F,0R0,WT,48.0,52.0,53.0,9.614,6.588,5.728,36.0,33.0,26.0
2,R,R00,WT,57.0,58.0,79.0,4.987,3.7405,4.269,28.5,23.0,21.0
3,F,0B0,WT,58.0,69.0,83.0,6.4725,4.267,3.739,31.0,22.0,19.0
4,M,000,WT,67.0,64.0,85.0,5.743,4.139,2.191,24.0,22.0,16.0
5,F,B00,WT,64.0,72.0,88.0,6.2415,4.7175,2.662,27.0,21.0,16.0


In [6]:
operant

Unnamed: 0,cage,label,geno,hit_0,hit_1,hit_2,hit_3,hit_4,hit_5,hit_6,...,rt_yes_8,rt_no_0,rt_no_1,rt_no_2,rt_no_3,rt_no_4,rt_no_5,rt_no_6,rt_no_7,rt_no_8
0,F,R00,WT,0.180556,0.731707,0.766667,0.833333,0.866667,0.8,0.866667,...,11.595,3.445,2.779,0.192,0.223,0.557,0.667,2.1195,3.683,18.637
1,F,0R0,WT,0.65,0.894118,0.901099,0.855556,0.811111,0.7,0.8,...,13.336429,7.094,3.601,0.858,0.122,1.807,1.238,1.933,2.238,5.4355
2,R,R00,WT,0.631579,0.546512,0.858824,0.855556,0.855556,0.9,1.0,...,6.780043,2.9735,2.204,1.5385,2.411,0.048,0.041,,1.5035,5.3365
3,F,0B0,WT,0.766234,0.727273,0.711111,0.788889,0.866667,0.833333,0.8,...,9.480714,2.0295,3.392,0.661,0.886,4.845,0.046,0.0395,4.788,29.205
4,M,000,WT,0.588889,0.644444,0.722222,0.733333,0.877778,0.733333,0.833333,...,10.187053,1.428,1.327,0.374,0.673,1.919,0.287,3.562,1.04,15.113
5,F,B00,WT,0.344444,0.835165,0.855556,0.755814,0.766667,0.833333,0.84,...,7.934652,2.346,0.724,0.588,0.252,1.72,0.252,1.991,3.4725,13.492
