___
## EPHYS analysis
___

#### Recover behavior

In [42]:
# Load modules and data
import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression

#Import all needed libraries
from matplotlib.lines import Line2D
import os
import pandas as pd
import numpy as np
from datahandler import Utils
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.patches as mpatches
from statannot import add_stat_annotation
import itertools
from scipy import stats
from datahandler import Utils
from ast import literal_eval
from glob import glob
from open_ephys.analysis import Session
import pyopenephys

from statsmodels.genmod.bayes_mixed_glm import BinomialBayesMixedGLM

In [43]:
batch = 'general'

path2 = 'C:/Users/Tiffany/Google Drive/WORKING_MEMORY/EXPERIMENTS/ELECTROPHYSIOLOGY/EphysB/globals'
os.getcwd() 
os.chdir(path2)

df_trials = pd.read_csv(path2 + '/global_trials.csv', sep=';')
df_params = pd.read_csv(path2 + '/global_params.csv', sep=';')
df_behavior = pd.merge(df_params,df_trials, on=['session','subject_name'])

#Rename some of the variables for a global consensus.
df_behavior = df_behavior.rename(columns={'subject_name': 'subject','hithistory': 'hit', 'probabilities': 'prob', 'validhistory': 'valids'})

# #Remove those sessions that the animal wasn't in the final training step: STAGE 3 or above, MOTOR 6, no delay progression (delay lengths remain the same), good accuracy in short trials. 
df_behavior =  df_behavior.loc[(df_behavior['stage_number'] >= 3)&(df_behavior['motor'] == 6)&(df_behavior['delay_progression']==0)&(df_behavior['accuracy_low'] >= 0.60)&(df_behavior['accuracy'] >= 0.60)]

  interactivity=interactivity, compiler=compiler, result=result)


In [44]:
df_behavior['hit']= df_behavior['hit'].astype(float)

In [45]:
# Select only the session and animal that we need
df_behavior = df_behavior.loc[(df_behavior.day == '2021-06-13')&(df_behavior.subject == 'E10')]

# Compute vector of answers
vector_answer_dev = np.logical_not(np.logical_xor(df_behavior['reward_side'], df_behavior['hit'].astype(int)))
vector_answer = np.where(vector_answer_dev==False, 0, 1)
df_behavior['vector_answer'] = vector_answer

In [46]:
# Add a new colum witht he repetition choice
df_behavior['repeat_choice'] = np.nan

for i in range(len(df_behavior)):
    if df_behavior['trials'].iloc[i] != 0:
        if df_behavior['vector_answer'].iloc[i-1] == df_behavior['vector_answer'].iloc[i]:
            df_behavior['repeat_choice'].iloc[i-1] = 1  # Repeat previous choice
        else:
            df_behavior['repeat_choice'].iloc[i-1] = 0 # Alternate previous choice

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


In [47]:
#Because the first trial has no delay, we need to shift one on the behavioral data in order to fit with the ttl one. 
df_behavior = df_behavior[1:]

In [48]:
df_behavior = Utils.convert_strings_to_lists(df_behavior, ['L_s', 'C_s','C_e','L_e'])

In [49]:
# Add a colum for first lick
df_temp =[]
for i in range(len(df_behavior)):
    if df_behavior.C_s.iloc[i][0] > df_behavior.L_s.iloc[i][0] or np.isnan(df_behavior.C_s.iloc[i][0]):
        df_temp.append(df_behavior.L_s.iloc[i][0])
    elif df_behavior.C_s.iloc[i][0] < df_behavior.L_s.iloc[i][0] or np.isnan(df_behavior.L_s.iloc[i][0]):
        df_temp.append(df_behavior.C_s.iloc[i][0])
    else:
        df_temp.append(np.nan)
df_behavior['lick'] = np.array(df_temp)

In [51]:
# Importing the data from a session in Ephys
path = 'C:/Users/Tiffany/Documents/Ephys/E10_2021-06-13_12-31-21/'
os.getcwd() 
os.chdir(path)

# Recover previous timestamps session
df_ttl = pd.read_csv(path + '/timestamps.csv', sep=',')
df = pd.read_csv(path + '/spike.csv', sep=',')

In [52]:
df_ttl.loc[df_ttl['ttl'] == 1, 'Delay_ON'] = df_ttl['timestamps'] # Mark onset of delays
df_ttl.loc[df_ttl['ttl'] == 0, 'Delay_OFF_next'] = df_ttl['timestamps'] # Mark offset of delay

In [53]:
# df_ttl['Delay_ON'] = df_ttl.apply(lambda x: x.timestamps if x.ttl == 1 else None, axis=1)
# df_ttl['Delay_OFF_next'] = df_ttl.apply(lambda x: x.timestamps if x.ttl == 0 else None, axis=1)

In [54]:
# Create new colum with delay offset to measure the delay duration and then remove it
df_ttl['Delay_OFF'] = df_ttl['Delay_OFF_next'].shift(-1)
df_ttl['Delay_length'] = df_ttl['Delay_OFF']  - df_ttl['Delay_ON'] 
df_ttl.drop('Delay_OFF_next',axis='columns', inplace=True)

In [55]:
df_ttl = df_ttl[df_ttl['Delay_ON'].notna()] # Remove the trials with nans

In [56]:
# Prepare a column with trial index. start in 1 because trial 0 doesn't have a delay and is not there. 
df_ttl['trial'] = np.arange(len(df_ttl))+1

In [57]:
# Merge with cluster labels, use trial to associate each one
df_behavior.rename(columns= {'trials': 'trial'},inplace=True)
df2_behavior = pd.merge(df_behavior,df_ttl, on=['trial'])

In [58]:
df_final = pd.DataFrame()
# We now have the moment of the onset of the delay and what it corresponds in the behavioral session. If we substract this we can get the start 
# for every session. Then, we will use the START for everything else. 
df2_behavior['START'] = df2_behavior['Delay_ON']-df2_behavior['Delay_start']
df_final['START'] = df2_behavior['Delay_ON']-df2_behavior['Delay_start']
df_final['Delay_ON'] = df2_behavior['Delay_ON']
df_final['Delay_OFF'] = df2_behavior['Delay_OFF']

In [59]:
df_final['Stimulus_ON'] = df2_behavior['START'] + df2_behavior['StimulusDuration_start']
df_final['Response_ON'] = df2_behavior['START'] + df2_behavior['ResponseWindow_start']
df_final['Lick_ON'] = df2_behavior['START'] + df2_behavior['lick']
df2_behavior['END'] = df2_behavior['START']+ df2_behavior['Motor_out_end']+0.006
df_final['Motor_OUT'] = df2_behavior.END  - 2
df_final['END'] = df2_behavior.END

In [60]:
df_final['vector_answer'] = df2_behavior['vector_answer']
df_final['reward_side'] = df2_behavior['reward_side']
df_final['hit'] = df2_behavior['hit']
df_final['repeat_choice'] = df2_behavior['repeat_choice']
df_final['miss'] = df2_behavior['misshistory']
df_final['trial'] = df2_behavior['trial']
df_final['delay'] = np.around(df2_behavior['Delay_OFF']-df2_behavior['Delay_ON'],2)

In [61]:
# df_final.drop(df_final.tail(1).index,inplace=True) # drop last n rows

In [62]:
df_final.tail()

Unnamed: 0,START,Delay_ON,Delay_OFF,Stimulus_ON,Response_ON,Lick_ON,Motor_OUT,END,vector_answer,reward_side,hit,repeat_choice,miss,trial,delay
217,2565.78,2566.724033,2567.724067,2566.29,2567.92,,2573.96,2575.96,1,0.0,0.0,1.0,True,218.0,1.0
218,2575.96,2576.905633,2586.905733,2576.48,2587.11,2587.34,2593.38,2595.38,1,0.0,0.0,0.0,False,219.0,10.0
219,2595.38,2596.3245,2596.424533,2595.89,2596.62,,2602.66,2604.66,0,1.0,0.0,1.0,True,220.0,0.1
220,2604.67,2605.6065,2615.606633,2605.18,2615.81,2615.98,2620.02,2622.02,0,0.0,1.0,1.0,False,221.0,10.0
221,2622.03,2622.972567,2625.972633,2622.54,2626.17,2626.44,,,0,1.0,0.0,,False,222.0,3.0


In [63]:
df['trial'] = 0
for i,rows in df_final.iterrows():
    # create a list of our conditions
    conditions = [(df.fixed_times > df_final['START'].iloc[i])&(df.fixed_times <= df_final['END'].iloc[i]),
                 (df.fixed_times < df_final['START'].iloc[i])]

    # create a list of the values we want to assign for each condition
    values = [df_final['trial'].iloc[i],df['trial']]

    # create a new column and use np.select to assign values to it using our lists as arguments
    df['trial'] = np.select(conditions, values)

In [64]:
print(df_final[df_final['trial'] ==46]['START'].iloc[0],df_final[df_final['trial'] ==46]['END'].iloc[0])

524.0904666666667 541.6286666666666


In [65]:
# Merge with cluster labels, use cluster ID to associate each one
df3 = pd.merge(df,df_final, on=['trial'])

In [66]:
df3.head()

Unnamed: 0.1,Unnamed: 0,times,cluster_id,group,fixed_times,trial,START,Delay_ON,Delay_OFF,Stimulus_ON,Response_ON,Lick_ON,Motor_OUT,END,vector_answer,reward_side,hit,repeat_choice,miss,delay
0,181,2644240,1193,good,88.141333,1.0,88.1202,89.061367,89.1614,88.6314,89.3687,89.4452,93.4885,95.4885,0,0.0,1.0,1.0,False,0.1
1,182,2647937,1193,good,88.264567,1.0,88.1202,89.061367,89.1614,88.6314,89.3687,89.4452,93.4885,95.4885,0,0.0,1.0,1.0,False,0.1
2,183,2652582,1193,good,88.4194,1.0,88.1202,89.061367,89.1614,88.6314,89.3687,89.4452,93.4885,95.4885,0,0.0,1.0,1.0,False,0.1
3,184,2653604,1193,good,88.453467,1.0,88.1202,89.061367,89.1614,88.6314,89.3687,89.4452,93.4885,95.4885,0,0.0,1.0,1.0,False,0.1
4,185,2660092,1193,good,88.669733,1.0,88.1202,89.061367,89.1614,88.6314,89.3687,89.4452,93.4885,95.4885,0,0.0,1.0,1.0,False,0.1


In [67]:
df3 = df3[df3.trial != df3.trial.unique()[-1]]

In [None]:
df3['a_Stimulus_ON'] = df3['fixed_times'] - df3['Stimulus_ON']
df3['a_Response_ON'] = df3['fixed_times'] - df3['Response_ON']
df3['a_Lick_ON'] = df3['fixed_times'] - df3['Lick_ON']
df3['a_Delay_OFF'] = df3['fixed_times'] - df3['Delay_OFF']

In [None]:
min(df3.a_Stimulus_ON)

In [None]:
# df3 = df3.drop(['times','group','accuracy','accuracy_high','accuracy_left','accuracy_low','accuracy_medium','accuracy_right','box','catch','date_x','date_y','day','delay_h','delay_l','delay_m','correct_trials','index','drug','fixation','invalid_trials','delay_progression_value'], axis=1)

In [None]:
len(df3)

In [None]:
df3 = df3.drop(['times','group'], axis=1)

In [None]:
# Merge with cluster labels, use cluster ID to associate each one
# df2.rename(columns= {'trials': 'trial'},inplace=True)
# df2 = pd.merge(df2,df_behavior[['lick','trial','vector_answer','reward_side','hit','misshistory','repeat_choice']], on=['trial'])

In [None]:
# Save the data in a new csv. 
os.getcwd() 
os.chdir(path)
df3.to_csv(path+'data.csv')

In [None]:
# COLORLEFT = '#31A2AC'
# COLORRIGHT = '#FF8D3F'
# with PdfPages(str(cluster)+'.pdf') as pdf:
#     for cluster in df3.cluster_id.unique():
#             fig, ax = plt.subplots(6, 2, figsize=(8.5, 13.7))   
#     #         plt.title(str(cluster))
#             sns.scatterplot(x='a_Delay_OFF',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.reward_side == 0)],marker='|', color=COLORLEFT, ax = ax[0][0])
#             sns.scatterplot(x='a_Delay_OFF',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.reward_side == 1)],marker='|', color=COLORRIGHT, ax = ax[1][0])
#             sns.histplot(data=df3.loc[(df3.cluster_id == cluster)], x="a_Delay_OFF", weights='trial',binwidth=0.250, ax=ax[2][0],stat='frequency',hue='reward_side',kde=True,element="step",palette=[COLORLEFT,COLORRIGHT])
#             ax[0][0].set_title('Response ON for Reward side')
#             ax[0][0].set_ylabel('Left trials')
#             ax[1][0].set_ylabel('Right trials')     
#             ax[2][0].set_xlabel('Time (s)')    

#             sns.scatterplot(x='a_Delay_OFF',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.hit == 0)],marker='|', color='crimson', ax = ax[3][0])
#             sns.scatterplot(x='a_Delay_OFF',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.hit == 1)],marker='|', color='green', ax = ax[4][0])
#             sns.histplot(data=df3.loc[(df3.cluster_id == cluster)], x="a_Delay_OFF", binwidth=0.250, ax=ax[5][0],stat='frequency',hue='hit',kde=True,element="step",palette=['crimson','green'])


#             sns.scatterplot(x='a_Stimulus_ON',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.reward_side == 0)],marker='|', color=COLORLEFT, ax = ax[0][1])
#             sns.scatterplot(x='a_Stimulus_ON',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.reward_side == 1)],marker='|', color=COLORRIGHT, ax = ax[1][1])
#             sns.histplot(data=df3.loc[(df3.cluster_id == cluster)], x="a_Stimulus_ON", binwidth=0.250, ax=ax[2][1],stat='frequency',hue='reward_side',kde=True,element="step",palette=[COLORLEFT,COLORRIGHT])

#             ax[0][1].set_title('Stimulus ON for Reward side')
#             ax[0][1].set_xlabel('Left trials')
#             ax[1][1].set_ylabel('Right trials')

#             sns.scatterplot(x='a_Stimulus_ON',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.hit == 0)],marker='|', color='crimson', ax = ax[3][1])
#             sns.scatterplot(x='a_Stimulus_ON',y='trial', data= df3.loc[(df3.cluster_id == cluster)&(df3.hit == 1)],marker='|', color='green', ax = ax[4][1])        
#             sns.histplot(data=df3.loc[(df3.cluster_id == cluster)], x="a_Stimulus_ON", binwidth=0.250, ax=ax[5][1],stat='frequency',hue='hit',kde=True,element="step",palette=['crimson','green'])

#             ax[0][1].set_title('Stimulus ON for Reward side')
#             ax[0][1].set_xlabel('Left trials')
#             ax[1][1].set_ylabel('Right trials')

#             sns.despine()

#     #         ax[0][1].set_xlim(-0.5,5)

#             for i in np.arange(6):
#                 ax[i][0].set_xlim(-5,5)
#                 ax[i][1].set_xlim(-0.5,10)
#                 ax[i][1].vlines(x=0,ymin=0,ymax=max(df3.trial),linestyle=':')
#                 ax[i][1].vlines(x=0.4,ymin=0,ymax=max(df3.trial),linestyle=':')
#                 ax[i][0].vlines(x=0,ymin=0,ymax=max(df3.trial),linestyle=':')            
#             plt.tight_layout()
#             plt.close()
#         pdf.savefig(fig)  # or you can pass a Figure object to pdf.savefig
# #     pdf.savefig(plt.gcf(), transparent=True)  #Saves the current figure into a pdf page
# #     plt.close()    