### **Figure for checking the decoder without separating states with the HMM**
This to verify if we could observe the reversals if we didn't take into account the states.

In [None]:
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.preprocessing import RobustScaler


import os 
import pandas as pd
import seaborn as sns
sns.set_context('talk')

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)
# warnings.simplefilter(action='ignore', category=PerformanceWarning)
warnings. filterwarnings('ignore', category=UserWarning)
warnings.simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
def test(df, epoch='Stimulus_ON',initrange=-0.4,endrange=1.5,r=0.2, model = None, train_cols=None, variable='ra_accuracy',
         hit=1, nsurrogates = 100, decode='vector_answer', ratio=0, cluster_list = [], test_index=[], 
         fakey=[], delay_only=False, T=1):
    '''
    Function that tests a previously trained function (func. train_decoder) on population activity of specific segments
    
    Attributes
        - df: DataFrame. it contains a whole ephys session without curation. 
        - WM and RL are the variables to consider a trial in the RL or in the WM-module. Both need to be floats. 
        - epoch: str. Moment at which the data will be aligned to. 
        - initrange: float. 
        - endrange: float.
        - r: float 
        - model. function. 
        - train_cols
        - name. String
        - variables. List. 
        - hits. List. 
        - colors. List
        - nsurrogates. Int. 
        - indexes. List 
        - decode. String
    
    Return
        - df_real
        - df_iter
        It will also make a plot. 
    '''
    
    df_real = pd.DataFrame()
    df_iter = pd.DataFrame()
        
    times = [] # Timestamps
    real_score = [] # real scoring of the decoded
    odds_score = [] # real scoring of the decoded
    mean_sur=[] # mean of the surrogate data

    for start, stop in zip(np.arange(initrange,endrange-r,r),np.arange(initrange+r,endrange,r)):
        times.append((start+stop)/2)
        df_final, y = interval_extraction_trial(df,variable = decode, align = epoch, start = start, stop = stop, cluster_list=cluster_list, delay_only=delay_only)
        
        # Sometimes the testing and the trainind dataset have different neurons since they are looking at different trials and perhaps there were no spikes
        # coming from all neurons. We compare which columns are missing and add them containing 0 for the model to work. 
        test_cols = df_final.columns
        common_cols = train_cols.intersection(test_cols)
        train_not_test = train_cols.difference(test_cols)
        for col in train_not_test:
            df_final[col] = 0

        #The other way round. When training in segmented data, sometimes the training set is smaller than the testing (for instance, when training in Hb trials and testing in WM)
        test_not_train = test_cols.difference(train_cols)
        for col in test_not_train:
            df_final.drop(columns=[col],inplace=True)
        
        df_final = df_final.reindex(columns=train_cols)

        #Train the model"
        if len(test_index) >= 1:
            # print('Train splitting trials')
            # Split data in training and testing
            # x_train, x_test, y_train, y_test =\
            #     train_test_split(df_final, y, test_size=test_sample,random_state=random_state)
            
            df_final.reset_index(inplace=True)
            df_final = df_final.drop(columns ='trial')
            test = df_final.loc[test_index,:]
            # print('Fold',str(fold_no),'Class Ratio:',sum(test['y'])/len(test['y']))
            x_test = test.iloc[:, test.columns != 'y']
            y_test = test['y']             

        else:
            x_train = df_final.iloc[:, df_final.columns != 'y']
            y_train = df_final['y']
            x_test = x_train
            y_test = y_train
        
        #Normalize the X data
        # sc = RobustScaler()
        # x_test = sc.fit_transform(x_test)
        # x_test = sc_fit.transform(x_test)
        
        p_pred = model.predict_proba(x_test)
        y_pred = model.predict(x_test)
        # score_ = model.score(x_test, y_test)
        # real_score.append(score_)

        # real_index_trial = df_final.reset_index()[df_final.index == T].index[0]
        
        y_test = np.where(y_test == -1, 0, y_test) 
        # y_new = y_test.values.reshape(len(y_test), 1).astype(int)
        y_new = y_test.reshape(len(y_test), 1).astype(int)
        corrected_score =  np.take_along_axis(p_pred, y_new, axis=1)  
        real_score.append(np.mean(corrected_score))
        
        # incorrected_score =  np.repeat(1, len(corrected_score))[0] - corrected_score
        # incorrected_score[incorrected_score == 0] = 0.001
        # log_odds = np.log(corrected_score/incorrected_score)
        # odds_score.append(np.mean(log_odds))
    
        i=0
        while i <= nsurrogates:
            i+=1
            y_perr = shuffle(y_test)
            # score_ = model.score(x_test, y_perr)
            
            # y_new = y_perr.values.reshape(len(y_perr), 1).astype(int)
            y_new = y_perr.reshape(len(y_perr), 1).astype(int)
            result =  np.take_along_axis(p_pred,y_new,axis=1)     
            score_  = np.mean(result)
            
#             corrected_score =  np.take_along_axis(p_pred,y_new,axis=1)     
#             incorrected_score =  np.repeat(1, len(corrected_score))[0] - corrected_score
#             incorrected_score[incorrected_score == 0] = 0.001

#             log_odds = np.log(corrected_score/incorrected_score)
#             score_  = np.mean(log_odds)

            df_iter = df_iter.append({'iteration': i, 'score': score_, 'times': (start+stop)/2, 'epoch' : epoch, 'variable' : variable+'_'+str(hit)}, ignore_index = True)
        
    times.append('trial_type')
    
    real_score.append(variable+'_'+str(hit))
    a_series = pd.Series(real_score, index = times)
    df_real = df_real.append(a_series, ignore_index=True)
    
    # odds_score.append(variable+'_'+str(hit))
    # a_series = pd.Series(odds_score, index = times)
    # df_real = df_real.append(a_series, ignore_index=True)
    
    df_iter = df_iter.fillna(0)
    return df_real, df_iter

In [None]:
def train(df, decode='vector_answer', align='Delay_OFF', start=-0.5, stop=0, cluster_list = [], ratio=0.65, 
          test_index=[],  train_index=[], fakey=[], delay_only=False):

    # df_final, y = interval_extraction_trial(df,variable = decode, align = 'Stimulus_ON', start = -0.25, stop = 0, cluster_list = cluster_list, delay_only=delay_only)
    # sc = RobustScaler()
    # x =df_final.iloc[:, df_final.columns != 'y']
    # sc_fit = sc.fit(x)
    
    df_final, y = interval_extraction_trial(df,variable = decode, align = align, start = start, stop = stop, cluster_list = cluster_list, delay_only=delay_only)
    
    # This is mainly for the session shuffles
    if len(fakey) > 1:
        print('Using shuffled session')
        y = fakey[len(fakey)-len(y):]
        df_final['y'] = y   
        
    train_cols = df_final.columns
    
    #Train the model   
    df_final.reset_index(inplace=True)
    df_final = df_final.drop(columns ='trial')
    x = df_final.iloc[:, df_final.columns != 'y']
    
    if len(test_index) >= 1:
        print('Using splits')
        train = df_final.loc[train_index,:]
        test = df_final.loc[test_index,:]
        # print('Fold',str(fold_no),'Class Ratio:',sum(test['y'])/len(test['y']))
        x_test = test.iloc[:, test.columns != 'y']
        y_test = test['y']
        x_train = train.iloc[:, train.columns != 'y']
        y_train = train['y']
        
    else:
        x_train = df_final.iloc[:, df_final.columns != 'y']
        y_train = df_final['y']
        x_test = x_train
        y_test = y_train
        
    #Normalize the X data
#     sc_fit = sc.fit(x)

#     x_train = sc.transform(x_train)
#     x_test = sc.transform(x_test)
    
    # x_train = sc_fit.transform(x_train)
    # x_test = sc_fit.transform(x_test)
    
    model = LogisticRegression(solver='liblinear', penalty = 'l2', C=.99).fit(x_train, y_train)
    # model = LogisticRegression(solver='liblinear', penalty = 'l1', C=0.95, fit_intercept=True).fit(x_train, y_train)
    train_cols = df_final.columns
    
    p_pred = model.predict_proba(x_test)    
    y_pred = model.predict(x_test)    
    f1score= f1_score(y_test, y_pred, average='weighted')

    y_test = np.where(y_test == -1, 0, y_test) 
    y_new = y_test.reshape(len(y_test), 1).astype(int)
    # y_new = y_test.values.reshape(len(y_test), 1).astype(int)
    score_ =  np.take_along_axis(p_pred,y_new,axis=1)   

    # print('Trained model on ', len(train_cols), ' neurons.')
    print('score:', np.mean(score_), 'f1_score ', f1score)
    # p_values = logit_pvalue(model, x_test)
    
    return model, train_cols,np.mean(score_)

In [None]:
def interval_extraction_trial(df, cluster_list=[], variable = 'vector_answer', align = 'Delay_OFF', start = 0, stop = 1, delay_only=False):
    y = []
    d = {}
    
    if delay_only == False:
        # print('Skipping delays')
        if align == 'Delay_OFF' and start < 0:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2)]
        if align == 'Delay_OFF' and start < -1:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2) & (df.delay != 1)]

        if align == 'Stimulus_ON' and stop > 0.5:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2)]

        if align == 'Stimulus_ON' and stop > 1.35:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2) & (df.delay != 1)]
    
    # print('Recovered from: ', str(len(df.trial.unique())), ' trials')
    # Create new aligment to the end of the session
    df['a_'+align] = df.fixed_times-df[align]

    # cluster_list = df_all.cluster_id.unique()
    df = df.sort_values('trial')
    
    y = df.groupby('trial').mean()[variable]

    # Filter for the spikes that occur in the interval we are analyzing
    df = df.loc[(df['a_'+align]>start)&(df['a_'+align]<stop)]

    df_final = pd.DataFrame()
    df_final = df.groupby(['trial','cluster_id']).count()
    df_final.reset_index(inplace=True)
    df_final = df_final.pivot_table(index=['trial'], columns='cluster_id', values='fixed_times', fill_value=0).rename_axis(None, axis=1)
    df_final = df_final.reindex(cluster_list, axis=1,fill_value=0)

    result = pd.merge(df_final, y, how="right", on=["trial"]).fillna(0)
    result = result.rename(columns={variable: "y"})
    result['y'] = np.where(result['y'] == 0, -1, result['y']) 
    
    return result, result['y']

In [None]:
baseline = 0.0
df_cum_res = pd.DataFrame()
df_cum_sti = pd.DataFrame()
df_cum_iter = pd.DataFrame()

os.chdir('C:/Users/Tiffany/Documents/Ephys/summary_complete')
for filename in os.listdir(os.getcwd()):
    if filename[-3:] != 'pdf':
        df = pd.read_csv(filename, sep=',',index_col=0)
    else:
        continue

    print(filename)

    # Variables used for decoder training
    decode = 'vector_answer'
    align='Delay_OFF'
    r=0.25
    start= -0.25
    stop= 0

    type_trial='WM_roll'
    hit=1
    ratio=0.6
    
    # #Variables for testing
    # colors=['black','crimson']
    # variables = ['all','all']
    # hits = [1,0]
    # ratios = ["all","all"]
    # variables_combined=[variables[0]+'_'+str(hits[0], variables[1]+'_'+str(hits[1]))]

    # # # #Variables for testing
    # colors=['darkgreen','indigo']
    # variables = ['WM_roll','RL_roll']
    # hits = [1, 1]
    # ratios = [0.6, 0.4]
    # variables_combined=[variables[0]+'_'+str(hits[0]),variables[1]+'_'+str(hits[1])]

    #Variables for testing
    colors=['darkgreen','crimson','indigo', 'purple']
    variables = ['WM_roll','WM_roll','RL_roll','RL_roll']
    hits = [1,0,1,0]
    ratios = [0.6,0.6,0.4,0.4]
    variables_combined=[variables[0]+'_'+str(hits[0]),variables[1]+'_'+str(hits[1]),
                        variables[2]+'_'+str(hits[2]),variables[3]+'_'+str(hits[3])]
 
    # colors=['darkgreen','crimson']
    # variables = ['WM_roll','WM_roll']
    # hits = [1,0]
    # ratios = [0.6,0.6]
    # variables_combined=[variables[0]+'_'+str(hits[0]),variables[1]+'_'+str(hits[1])]

####################################### ----------------- Add 2 more seconds of the previous trials before the current stimulus
    # df = df.rename(columns={'past_choices_x' : 'past_choices', 'streak_x' : 'streak', 'past_rewards_x' : 'past_rewards'})
    # df = df.drop(columns=['past_choices_y','streak_y', 'past_rewards_y'])

    # Create a DataFrame only with info for the session
    trials = df.groupby(['trial']).mean()
    try:
        trials = trials[['START','END','Delay_ON','Delay_OFF', 'Stimulus_ON', 'Response_ON', 'Lick_ON', 'Motor_OUT','new_trial',
               'vector_answer', 'reward_side', 'hit', 'delay','total_trials', 'T', 'previous_vector_answer', 'previous_reward_side','repeat_choice',
                'WM_roll', 'RL_roll', 'WM', 'RL', 'streak']]
    except:
        trials = trials[['START','END','Delay_ON','Delay_OFF', 'Stimulus_ON', 'Response_ON', 'Lick_ON', 'Motor_OUT','new_trial',
               'vector_answer', 'reward_side', 'hit', 'delay','total_trials', 'T', 'previous_vector_answer', 'previous_reward_side','repeat_choice',
                'WM_roll', 'RL_roll', 'WM', 'RL']]
    trials = trials.reset_index()

    # Make an aligment to END column
    df['a_END'] = df['fixed_times'] - df['END']

    # Create a new DataFrame with all spikes
    try:
        # Some sessions include the group column that indicates the type of cluste,r other don't
        spikes = df[['trial','fixed_times','a_END','cluster_id', 'group']]
    except:
        spikes = df[['trial','fixed_times','a_END','cluster_id']]

    # Locate spikes that happen 2s prior to end of trial and copy them changing the new_trial index
    duplicate_spikes = spikes.loc[spikes.a_END >-2]
    duplicate_spikes['trial'] +=1 

    # Add the duplicates
    spikes = pd.concat([spikes, duplicate_spikes])

    # Merge trial data with spikes on trial idnex
    df = pd.DataFrame()
    df = pd.merge(trials, spikes, on=["trial"])

    # Create the columns for start and end and change trial to new trial index ( without taking the misses into account)
    # df['trial_start'] = min(df.new_trial)
    # df['trial_end'] = max(df.new_trial)
    # df = df.drop(columns=['trial'])
    # df = df.rename(columns={'new_trial' : 'trial'})

    # This in case we don't do this and want to preserve the orginal trial indexes. 
    df['trial_start'] = min(df.trial)
    df['trial_end'] = max(df.trial)

    # Crate the aligment that ew will need for the analysis. 
    df['a_Stimulus_ON'] =  df['fixed_times'] - df['Stimulus_ON']
    df['a_Lick_ON'] =  df['fixed_times'] - df['Lick_ON']
    df['a_Delay_OFF'] =  df['fixed_times'] - df['Delay_OFF']
    df['a_Motor_OUT'] =  df['fixed_times'] - df['Motor_OUT']
    df['a_Response_ON'] =  df['fixed_times'] - df['Response_ON']
    df['START_adjusted'] =  df['START'] - 2.1
    
############################################################# -------------------------------------------------------------------------
    df['delay'] = np.around(df.delay,2)
    df = df.loc[(df.delay!=0.1)&(df.delay!=0.2)]
    
    if type_trial == 'all' and hit == 'all':
        print('All trials')
        df_train = df
    elif type_trial == 'all':
        df_train = df.loc[(df.hit==hit)]
    elif hit == 'all':
        df_train = df.loc[(df[type_trial]>=ratio)]
    else:
        df_train = df.loc[(df[type_trial]>=ratio)&(df.hit==hit)]
        
    cluster_list = df.cluster_id.unique()

    df_final, y = interval_extraction_trial(df_train, variable = decode, align = align, start = start, stop = stop, cluster_list=cluster_list)
    df_final.reset_index(inplace=True)
    with_trial_df = df_final
    df_final = df_final.drop(columns ='trial')

    fig, ([ax1,ax2])  = plt.subplots(1,2, figsize=(12, 4), sharey=True)

    print('Number of trials for training: ' , len(y))
    
    if len(df_final)<5:
        continue
        
    skf = StratifiedKFold(n_splits=5, shuffle=True)
    fold_no = 1
    for train_index, test_index in skf.split(df_final, y):
        
        trained_trials = with_trial_df.iloc[train_index].trial.unique()
        
        x = y.reset_index()
        if 1 not in x.iloc[test_index]['y'].values or 1 not in x.iloc[train_index]['y'].values or -1 not in x.iloc[test_index]['y'].values or -1 not in x.iloc[train_index]['y'].values:
        # if  1 not in x.iloc[train_index]['y'].values or -1 not in x.iloc[train_index]['y'].values:
            print('Skip this session because only one choice')
            fold_no+=1
            continue
            
        print('Amount of trials:', len(test_index))

        df_res = pd.DataFrame()
        df_sti = pd.DataFrame()
        df_iter = pd.DataFrame()

        model, train_cols, score = train(df_train, decode=decode, align=align, start=start,stop=stop, ratio=ratio, 
                                 cluster_list = cluster_list, test_index=test_index,  train_index=train_index)
        
        for color, variable,hit, ratio, left,right in zip(colors,variables,hits, ratios,[ax1, ax1, ax1, ax1],[ax2, ax2, ax2, ax2]):
            # Create a dataframe for testing data
            print(type_trial, hit)
            if type_trial == 'all' and hit == 'all':
                df_test = df_train
            elif variable == 'all':
                df_test = df.loc[(df.hit==hit)]
            elif hit == 'all':
                df_test = df.loc[(df[variable]>=ratio)]
            else:
                df_test = df.loc[(df[variable]>=ratio)&(df.hit==hit)]
            
            print(variable + str(ratio) + '_' + str(hit))
                
            # -----------  Remove the trials that overlap with the training set.                          
            # print(df_test.trial.unique(), trained_trials)
            df_test = df_test[~df_test['trial'].isin(trained_trials)] 
            # print(df_test.trial.unique())
            test_index=[]
            if len(df_test.trial.unique()) <2:
                continue

            df_real,df_temp = test(df_test, epoch='Stimulus_ON',initrange=-2,endrange=1.4, r=r, model = model, cluster_list=cluster_list,
                                    variable=variable, hit=hit, nsurrogates = 100, ratio=ratio, test_index = test_index,
                                   train_cols =  train_cols)

            df_sti = pd.concat([df_real,df_sti])
            df_iter = pd.concat([df_iter,df_temp])

            df_real,df_temp = test(df_test, decode= decode,  epoch='Delay_OFF',initrange=-1,endrange=4.5,r=r, model = model, cluster_list=cluster_list, 
                                    variable=variable, hit=hit,  nsurrogates = 100, ratio=ratio, test_index = test_index,
                                   train_cols =  train_cols)

            df_res = pd.concat([df_real,df_res])
            df_iter = pd.concat([df_iter,df_temp])

            variable = str(variable)+'_'+str(hit)
            
            y_lower = 0
            y_upper=0

            # Aligmnent for Stimulus cue
            real = np.array(df_sti.loc[df_sti['trial_type'] ==variable].mean(axis=0))
            times = np.array(df_sti.columns)[:-1].astype(float)

            if color=='indigo':
                left.set_xlabel('Time (s) to Cue')
            sns.despine()

            df_new = pd.DataFrame()
            for iteration in np.arange(1,100):
                df_new[iteration]= df_iter.loc[(df_iter.variable==variable)&(df_iter.iteration==iteration)&(df_iter.epoch=='Stimulus_ON')].groupby('times').mean()['score']

            y_mean= df_new.mean(axis=1).values
            upper =  df_new.quantile(q=0.975, interpolation='linear',axis=1) - y_mean
            lower =  df_new.quantile(q=0.025, interpolation='linear',axis=1) - y_mean
            # upper =  df_new.quantile(q=0.975, interpolation='linear',axis=1)
            # lower =  df_new.quantile(q=0.025, interpolation='linear',axis=1)
            x=times

            try:
                a_series = pd.DataFrame(pd.Series(real-y_mean, index = times)).T
                # a_series = pd.DataFrame(pd.Series(real, index = times)).T
                a_series['subject'] = filename[:3]
                a_series['trial_type'] = variable
                a_series['session'] = filename
                a_series['fold'] = fold_no
                a_series['score'] = score

                df_cum_sti = df_cum_sti.append(a_series, ignore_index=True)
            except:
                print('Did not add to summary: ', variable)
                continue

            # plt.plot(x, y_mean, label='Non-prefered Stimulus', color=color)
            left.plot(times,real-y_mean, color=color)
            # left.plot(times,real, color=color)
            left.plot(x, lower, color=color, linestyle = '',alpha=0.6)
            left.plot(x, upper, color=color, linestyle = '',alpha=0.6)
            left.fill_between(x, lower, upper, alpha=0.2, color=color)
            if max(real-y_mean)>y_upper:
                y_upper = max(upper)
            if  min(real-y_mean)<y_lower:
                y_lower = min(lower)
            left.set_ylim(-0.5,1)
            left.axhline(y=baseline,linestyle=':',color='black')
            left.fill_betweenx(np.arange(-1.15,4.15,0.1), 0,0.45, color='grey', alpha=.4)
            sns.despine()

            # -------------------- For Aligment to Go cue

            real = np.array(df_res.loc[df_res['trial_type'] == variable].mean(axis=0))
            times = np.array(df_res.columns)[:-1].astype(float)

            df_new = pd.DataFrame()
            for iteration in np.arange(1,100):
                df_new[iteration]= df_iter.loc[(df_iter.variable==variable)&(df_iter.iteration==iteration)&(df_iter.epoch=='Delay_OFF')].groupby('times').mean()['score']

            y_mean= df_new.mean(axis=1).values
            upper =  df_new.quantile(q=0.975, interpolation='linear',axis=1) - y_mean
            lower =  df_new.quantile(q=0.025, interpolation='linear',axis=1) - y_mean
            # upper =  df_new.quantile(q=0.975, interpolation='linear',axis=1)
            # lower =  df_new.quantile(q=0.025, interpolation='linear',axis=1)
            x=times

            # ax2.plot(x, y_mean, color=color)

            right.plot(times,real-y_mean, color=color)
            # right.plot(times,real, color=color)
            right.plot(x, lower , color=color, linestyle = '',alpha=0.6)
            right.plot(x, upper , color=color, linestyle = '',alpha=0.6)
            right.fill_between(x, lower, upper, alpha=0.2, color=color)
            right.set_ylim(-0.2,0.5)
            right.axhline(y=baseline,linestyle=':',color='black')
            right.fill_betweenx(np.arange(-1.125,4.1,0.1), 0,0.2, color='beige', alpha=.8)
            if color=='indigo':
                right.set_xlabel('Time (s) to Go')

            sns.despine()

            try:
                a_series = pd.DataFrame(pd.Series(real-y_mean, index = times)).T
                # a_series = pd.DataFrame(pd.Series(real, index = times)).T
                a_series['subject'] = filename[:3]
                a_series['trial_type'] = variable
                a_series['session'] = filename
                a_series['fold'] = fold_no
                a_series['score'] = score

                df_cum_res = df_cum_res.append(a_series, ignore_index=True)
            except:
                print('Did not add to summary: ', variable)
                continue

            fold_no+=1

    plt.tight_layout()
    plt.show()

In [None]:
variables_combined=['all_1', 'all_0']

In [None]:
fig, ([ax1,ax2])  = plt.subplots(1,2, figsize=(14, 4), sharey=True)
for color, variable,left,right in zip(colors,variables_combined,[ax1, ax1, ax1, ax1],[ax2, ax2, ax2, ax2]):
    print(variable)
    # Aligmnent for Stimulus cue
    # real = np.array(df_cum_sti.loc[:, df_cum_sti.columns != 'session'].mean())
    # real = df_cum_sti.groupby('session').mean()
    real = np.array(np.mean(df_cum_sti.loc[(df_cum_sti['trial_type'] == variable)].groupby('session').median().drop(columns=['score','fold']))) 
    # real = np.array(np.mean(df_cum_sti.groupby('session').median()))
    times = df_cum_sti.loc[(df_cum_sti['trial_type'] == variable)]
    times = np.array(times.drop(columns=['session','fold','score','subject','trial_type'],axis = 1).columns.astype(float))
    # times = np.array(df_cum_sti.columns[:-3]).astype(float)

    left.set_xlabel('Time (s) to Cue')
    sns.despine()

    df_lower = pd.DataFrame()
    df_upper = pd.DataFrame()

    for timepoint in times:
        mean_surr = []

        # recover the values for that specific timepoint
        try:
            array = df_cum_sti.loc[(df_cum_sti.trial_type ==variable)].drop(columns='subject').groupby('session').mean()[str(timepoint)].to_numpy()
        except:
            array = df_cum_sti.loc[(df_cum_sti.trial_type ==variable)].drop(columns='subject').groupby('session').mean()[timepoint].to_numpy()
        array = array[~np.isnan(array)]
        # iterate several times with resampling: chose X time among the same list of values
        for iteration in range(1000):
            x = np.random.choice(array, size=len(array), replace=True)
            # recover the mean of that new distribution
            mean_surr.append(np.mean(x))

        df_lower.at[0,timepoint] = np.percentile(mean_surr, 2.5)
        df_upper.at[0,timepoint] = np.percentile(mean_surr, 97.5)

    lower =  df_lower.iloc[0].values
    upper =  df_upper.iloc[0].values
    # lower =  real - 2*df_for_boots.std()
    # upper =  real + 2*df_for_boots.std()
    # lower =  df_cum_sti.quantile(0.025)
    # upper =  df_cum_sti.quantile(0.975)
    x=times

    # plt.plot(x, y_mean, label='Non-prefered Stimulus', color=color)
    left.plot(times,real, color=color)
    left.plot(x, lower, color=color, linestyle = '',alpha=0.6)
    left.plot(x, upper, color=color, linestyle = '',alpha=0.6)
    left.fill_between(x, lower, upper, alpha=0.2, color=color)
    left.set_ylim(0.4,0.8)
    left.axhline(y=0.0,linestyle=':',color='black')
    left.fill_betweenx(np.arange(-1,3.15,0.1), 0,0.45, color='grey', alpha=.4)
    sns.despine()

    # -------------------- For Aligment to Go cue
    # real = np.array(df_cum_res.loc[:, df_cum_res.columns != 'session'].mean())
    # real =  np.array(np.mean(df_cum_res.groupby('session').median()))
    # times = np.array(df_cum_res.columns[:-3]).astype(float)
    
    real = np.array(np.mean(df_cum_res.loc[(df_cum_res['trial_type'] == variable)].groupby('session').median().drop(columns=['score','fold'])))
    times = df_cum_res.loc[(df_cum_res['trial_type'] == variable)]
    times = np.array(times.drop(columns=['session','fold','score','subject','trial_type'],axis = 1).columns.astype(float))
    
    df_lower = pd.DataFrame()
    df_upper = pd.DataFrame()

    for timepoint in times:
        mean_surr = []

        # recover the values for that specific timepoint
        try:
            array = df_cum_res.loc[(df_cum_res.trial_type ==variable)].drop(columns='subject').groupby('session').mean()[str(timepoint)].to_numpy()
        except:
            array = df_cum_res.loc[(df_cum_res.trial_type ==variable)].drop(columns='subject').groupby('session').mean()[timepoint].to_numpy()
        array = array[~np.isnan(array)]
        # iterate several times with resampling: chose X time among the same list of values
        for iteration in range(1000):
            x = np.random.choice(array, size=len(array), replace=True)
            # recover the mean of that new distribution
            mean_surr.append(np.mean(x))

        df_lower.at[0,timepoint] = np.percentile(mean_surr, 2.5)
        df_upper.at[0,timepoint] = np.percentile(mean_surr, 97.5)

    lower =  df_lower.iloc[0].values
    upper =  df_upper.iloc[0].values
    # lower =  real - 2*df_for_boots.std()
    # upper =  real + 2*df_for_boots.std()
    # lower =  df_cum_res.quantile(0.025)
    # upper =  df_cum_res.quantile(0.975)
    
    # lower = -np.mean(df_cum_res.loc[df_cum_sti['trial_type'] ==variable].groupby(['session','fold']).sem(),axis=0)*2
    # upper = np.mean(df_cum_res.loc[df_cum_sti['trial_type'] ==variable].groupby(['session','fold']).sem(),axis=0)*2
    x=times

    # ax2.plot(x, y_mean, color=color)
    right.plot(times,real, color=color)
    right.plot(x, lower, color=color, linestyle = '',alpha=0.6)
    right.plot(x, upper, color=color, linestyle = '',alpha=0.6)
    right.fill_between(x, lower, upper, alpha=0.2, color=color)
    right.set_ylim(-0.1,0.2)
    right.axhline(y=0.0,linestyle=':',color='black')
    right.fill_betweenx(np.arange(-1.1,3.1,0.1), 0,0.2, color='beige', alpha=.8)
    right.set_xlabel('Time (s) to Go')
    right.set_xlim(-1,2)

    sns.despine()
#     plt.ylim(0.4,0.8)
plt.tight_layout()
# plt.savefig(save_path+'/decoder_'+decode+'_'+align+'_'+trials+'start'+str(start)+'_stop'+str(stop)+'_summary.svg', dpi=300, bbox_inches='tight') 
plt.show()

In [None]:
save_path = 'C:/Users/Tiffany/Google Drive/WORKING_MEMORY/PAPER/Figures/'
os.chdir(save_path)

file_name = 'all_all1 and 0 delay code'
# df_cum_sti.to_csv(file_name+'_sti.csv')
# df_cum_res.to_csv(file_name+'_res.csv')
df_cum_sti = pd.read_csv(file_name+'_sti.csv', index_col=0)
df_cum_res = pd.read_csv(file_name+'_res.csv', index_col=0)

### Figure for all trials code but using single delays. 

In [None]:
def interval_extraction_trial(df, cluster_list=[], variable = 'vector_answer', align = 'Delay_OFF', start = 0, stop = 1, delay_only=False):
    y = []
    d = {}
    
    if delay_only == False:
        # print('Skipping delays')
        if align == 'Delay_OFF' and start < 0:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2)]
        if align == 'Delay_OFF' and start < -1:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2) & (df.delay != 1)]

        if align == 'Stimulus_ON' and stop > 0.5:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2)]

        if align == 'Stimulus_ON' and stop > 1.5:
            df = df.loc[(df.delay != 0.1) & (df.delay != 0.2) & (df.delay != 1)]
    
    # print('Recovered from: ', str(len(df.trial.unique())), ' trials')
    # Create new aligment to the end of the session
    df['a_'+align] = df.fixed_times-df[align]

    # cluster_list = df_all.cluster_id.unique()
    df = df.sort_values('trial')
    
    y = df.groupby('trial').mean()[variable]

    # Filter for the spikes that occur in the interval we are analyzing
    df = df.loc[(df['a_'+align]>start)&(df['a_'+align]<stop)]

    df_final = pd.DataFrame()
    df_final = df.groupby(['trial','cluster_id']).count()
    df_final.reset_index(inplace=True)
    df_final = df_final.pivot_table(index=['trial'], columns='cluster_id', values='fixed_times', fill_value=0).rename_axis(None, axis=1)
    df_final = df_final.reindex(cluster_list, axis=1,fill_value=0)

    result = pd.merge(df_final, y, how="right", on=["trial"]).fillna(0)
    result = result.rename(columns={variable: "y"})
    result['y'] = np.where(result['y'] == 0, -1, result['y']) 
    
    return result, result['y']

In [None]:
def train(df, decode='vector_answer', align='Delay_OFF', start=-0.5, stop=0, cluster_list = [], ratio=0.65, test_index=[],  train_index=[], fakey=[], delay_only=False):

    df_final, y = interval_extraction_trial(df,variable = decode, align = 'Stimulus_ON', start = -0.25, stop = 0, cluster_list = cluster_list, delay_only=delay_only)
    sc = RobustScaler()
    x =df_final.iloc[:, df_final.columns != 'y']
    sc_fit = sc.fit(x)
    
    df_final, y = interval_extraction_trial(df,variable = decode, align = align, start = start, stop = stop, cluster_list = cluster_list, delay_only=delay_only)
    
    # This is mainly for the session shuffles
    if len(fakey) > 1:
        print('Using shuffled session')
        y = fakey[len(fakey)-len(y):]
        df_final['y'] = y   
        
    train_cols = df_final.columns
    
    #Train the model   
    df_final.reset_index(inplace=True)
    df_final = df_final.drop(columns ='trial')
    
    if len(test_index) >= 1:
        print('Using splits')
        train = df_final.loc[train_index,:]
        test = df_final.loc[test_index,:]
        # print('Fold',str(fold_no),'Class Ratio:',sum(test['y'])/len(test['y']))
        x_test = test.iloc[:, test.columns != 'y']
        y_test = test['y']
        x_train = train.iloc[:, train.columns != 'y']
        y_train = train['y']
        
    else:
        x_train = df_final.iloc[:, df_final.columns != 'y']
        y_train = df_final['y']
        x_test = x_train
        y_test = y_train
        
    #Normalize the X data
    # sc = RobustScaler()
    # sc_fit = sc.fit(df_final.iloc[:, df_final.columns != 'y'])
    
    # x_train = sc.fit_transform(x_train)
    # x_test = sc.fit_transform(x_test)
    
    # x_train = sc_fit.transform(x_train)
    # x_test = sc_fit.transform(x_test)
    
    model = LogisticRegression(solver='liblinear', penalty = 'l1', C=0.99).fit(x_train, y_train)
    # model = LogisticRegression(solver='liblinear', penalty = 'l1', C=0.95, fit_intercept=True).fit(x_train, y_train)
    train_cols = df_final.columns
    
    p_pred = model.predict_proba(x_test)    
    y_pred = model.predict(x_test)    
    f1score= f1_score(y_test, y_pred, average='weighted')

    y_test = np.where(y_test == -1, 0, y_test) 
    y_new = y_test.reshape(len(y_test), 1).astype(int)
    # y_new = y_test.values.reshape(len(y_test), 1).astype(int)
    score_ =  np.take_along_axis(p_pred,y_new,axis=1)   

    # print('Trained model on ', len(train_cols), ' neurons.')
    print('score:', np.mean(score_), 'f1_score ', f1score)
    
    return model, train_cols, np.mean(score_), sc_fit

In [None]:
def test(df,sc_fit, epoch='Stimulus_ON',initrange=-0.4,endrange=1.5,r=0.2, model = None, train_cols=None, variable='ra_accuracy',
                      hit=1, nsurrogates = 100, decode='vector_answer', ratio=0, cluster_list = [], test_index=[], fakey=[], delay_only=False):
    '''
    Function that tests a previously trained function (func. train_decoder) on population activity of specific segments
    
    Attributes
        - df: DataFrame. it contains a whole ephys session without curation. 
        - WM and RL are the variables to consider a trial in the RL or in the WM-module. Both need to be floats. 
        - epoch: str. Moment at which the data will be aligned to. 
        - initrange: float. 
        - endrange: float.
        - r: float 
        - model. function. 
        - train_cols
        - name. String
        - variables. List. 
        - hits. List. 
        - colors. List
        - nsurrogates. Int. 
        - indexes. List 
        - decode. String
    
    Return
        - df_real
        - df_iter
        It will also make a plot. 
    '''
    
    df_real = pd.DataFrame()
    df_iter = pd.DataFrame()
        
    times = [] # Timestamps
    real_score = [] # real scoring of the decoded
    mean_sur=[] # mean of the surrogate data

    for start, stop in zip(np.arange(initrange,endrange-r,r),np.arange(initrange+r,endrange,r)):
        times.append((start+stop)/2)
        df_final, y = interval_extraction_trial(df,variable = decode, align = epoch, start = start, stop = stop, cluster_list=cluster_list, delay_only=delay_only)

        # Sometimes the testing and the trainind dataset have different neurons since they are looking at different trials and perhaps there were no spikes
        # coming from all neurons. We compare which columns are missing and add them containing 0 for the model to work. 
        test_cols = df_final.columns
        common_cols = train_cols.intersection(test_cols)
        train_not_test = train_cols.difference(test_cols)
        for col in train_not_test:
            df_final[col] = 0

        #The other way round. When training in segmented data, sometimes the training set is smaller than the testing (for instance, when training in Hb trials and testing in WM)
        test_not_train = test_cols.difference(train_cols)
        for col in test_not_train:
            df_final.drop(columns=[col],inplace=True)

        #Train the model"
        if len(test_index) >= 1:
            print('Train splitting trials')
            # Split data in training and testing
            # x_train, x_test, y_train, y_test =\
            #     train_test_split(df_final, y, test_size=test_sample,random_state=random_state)
            
            df_final.reset_index(inplace=True)
            df_final = df_final.drop(columns ='trial')
            test = df_final.loc[test_index,:]
            # print('Fold',str(fold_no),'Class Ratio:',sum(test['y'])/len(test['y']))
            x_test = test.iloc[:, test.columns != 'y']
            y_test = test['y']             

        else:
            x_train = df_final.iloc[:, df_final.columns != 'y']
            y_train = df_final['y']
            x_test = x_train
            y_test = y_train
        
        #Normalize the X data
        # sc = RobustScaler()
        # x_test = sc.fit_transform(x_test)
        # x_test = sc_fit.transform(x_test)
        
        p_pred = model.predict_proba(x_test)
        y_pred = model.predict(x_test)
        score_ = model.score(x_test, y_test)
        real_score.append(score_)

        # y_test = np.where(y_test == -1, 0, y_test) 
        # y_new = y_test.reshape(len(y_test), 1).astype(int)
        # corrected_score =  np.take_along_axis(p_pred,y_new,axis=1)   
        # real_score.append(np.mean(corrected_score))

        # print('score:', score_, 'corrected score: ', np.mean(corrected_score), end='\n\n')

        i=0
        while i <= nsurrogates:
            i+=1
            y_perr = shuffle(y_test)
            score_ = model.score(x_test, y_perr)

            # y_new = y_perr.reshape(len(y_perr), 1).astype(int)
            # result =  np.take_along_axis(p_pred,y_new,axis=1)     
            # score_  = np.mean(result)

            df_iter = df_iter.append({'iteration': i, 'score': score_, 'times': (start+stop)/2, 'epoch' : epoch, 'variable' : variable+'_'+str(hit)}, ignore_index = True)
        
    times.append('trial_type')
    real_score.append(variable+'_'+str(hit))
    a_series = pd.Series(real_score, index = times)
    df_real = df_real.append(a_series, ignore_index=True)
    
    return df_real, df_iter

In [None]:
## Dataframe used for cumulative analysis
df_cum_sti = pd.DataFrame()
df_cum_shuffle = pd.DataFrame()

os.chdir('C:/Users/Tiffany/Documents/Ephys/summary_complete')
for filename in os.listdir(os.getcwd()):
# for filename in list_of_sessions:
    # 
    if filename[-3:] != 'pdf':
        df = pd.read_csv(filename, sep=',',index_col=0)
    else:
        continue
        
    print(filename, '/ Total session trials: ', len(df.trial.unique()), '/ Number of neurons: ', len(df.cluster_id.unique()))
    
    # df['WM_roll'] = compute_window_centered(df, 3,'WM')

####################################### ----------------- Add 2 more seconds of the previous trials before the current stimulus
    # df = df.rename(columns={'past_choices_x' : 'past_choices', 'streak_x' : 'streak', 'past_rewards_x' : 'past_rewards'})
    # df = df.drop(columns=['past_choices_y','streak_y', 'past_rewards_y'])

    # Create a DataFrame only with info for the session
    trials = df.groupby(['trial']).mean()
    try:
        trials = trials[['START','END','Delay_ON','Delay_OFF', 'Stimulus_ON', 'Response_ON', 'Lick_ON', 'Motor_OUT','new_trial',
               'vector_answer', 'reward_side', 'hit', 'delay','total_trials', 'T', 'previous_vector_answer', 'previous_reward_side','repeat_choice',
                'WM_roll', 'RL_roll', 'WM', 'RL', 'streak']]
    except:
        trials = trials[['START','END','Delay_ON','Delay_OFF', 'Stimulus_ON', 'Response_ON', 'Lick_ON', 'Motor_OUT','new_trial',
               'vector_answer', 'reward_side', 'hit', 'delay','total_trials', 'T', 'previous_vector_answer', 'previous_reward_side','repeat_choice',
                'WM_roll', 'RL_roll', 'WM', 'RL']]
    trials = trials.reset_index()

    # Make an aligment to END column
    df['a_END'] = df['fixed_times'] - df['END']

    # Create a new DataFrame with all spikes
    try:
        # Some sessions include the group column that indicates the type of cluste,r other don't
        spikes = df[['trial','fixed_times','a_END','cluster_id', 'group']]
    except:
        spikes = df[['trial','fixed_times','a_END','cluster_id']]

    # Locate spikes that happen 2s prior to end of trial and copy them changing the new_trial index
    duplicate_spikes = spikes.loc[spikes.a_END >-4]
    duplicate_spikes['trial'] +=1 

    # Add the duplicates
    spikes = pd.concat([spikes, duplicate_spikes])

    # Merge trial data with spikes on trial idnex
    df = pd.DataFrame()
    df = pd.merge(trials, spikes, on=["trial"])

    # Create the columns for start and end and change trial to new trial index ( without taking the misses into account)
    # df['trial_start'] = min(df.new_trial)
    # df['trial_end'] = max(df.new_trial)
    # df = df.drop(columns=['trial'])
    # df = df.rename(columns={'new_trial' : 'trial'})

    # This in case we don't do this and want to preserve the orginal trial indexes. 
    df['trial_start'] = min(df.trial)
    df['trial_end'] = max(df.trial)

    # Crate the aligment that ew will need for the analysis. 
    df['a_Stimulus_ON'] =  df['fixed_times'] - df['Stimulus_ON']
    df['a_Lick_ON'] =  df['fixed_times'] - df['Lick_ON']
    df['a_Delay_OFF'] =  df['fixed_times'] - df['Delay_OFF']
    df['a_Motor_OUT'] =  df['fixed_times'] - df['Motor_OUT']
    df['a_Response_ON'] =  df['fixed_times'] - df['Response_ON']
    df['START_adjusted'] =  df['START'] - 4.1
    
############################################################# -------------------------------------------------------------------------

    substract = False
    df['delay'] = np.around(df.delay,2)
    
    # Variables used for decoder training
    decode = 'vector_answer'
    align='Delay_OFF'
    ratio = 0.6
    start = -0.25
    stop = 0
    type_trial ='all'
    hit = 1
    nsplits = 3
    
    #Variables for testing
    # colors=['darkgreen','crimson', 'indigo']
    # variables = ['WM_roll','WM_roll','RL_roll']
    # hits = [1,0,1]
    # ratios = [0.6,0.6,0.4]
    # variables_combined=[variables[0]+'_'+str(hits[0]),variables[1]+'_'+str(hits[1]),variables[2]+'_'+str(hits[2])]

    colors=['crimson','darkgreen']
    variables = ['all','all']
    hits = [0,1]
    ratios = ["all","all"]
    variables_combined=[variables[0]+'_'+str(hits[0]),variables[1]+'_'+str(hits[1])]

    # colors=['crimson','darkgreen','indigo','purple']
    # variables = ['WM_roll','WM_roll','RL_roll','RL_roll']
    # hits = [0,1,1,0]
    # ratios = [0.6,0.6,0.4,0.4]
    # variables_combined=[variables[0]+'_'+str(hits[0]),variables[1]+'_'+str(hits[1]),variables[2]+'_'+str(hits[2]),variables[3]+'_'+str(hits[3])]

    cluster_list = df.cluster_id.unique()

    skf = StratifiedKFold(n_splits=nsplits)
    # skf = KFold(n_splits=nsplits, shuffle=True)

    # Create a dataframe for training data
    if type_trial == 'all':
        df_train = df.loc[(df.hit==hit)]
    elif hit == 'all':
        df_train = df.loc[(df[type_trial]>=ratio)]
    else:
        df_train = df.loc[(df[type_trial]>=ratio)&(df.hit==hit)]

    df_train = df_train.loc[(df_train.delay!=0.2)&(df_train.delay!=0.1)]
    
    df_final, y = interval_extraction_trial(df_train, variable = decode, align = align, start = start, stop = stop, cluster_list=cluster_list)
    df_final.reset_index(inplace=True)
    df_final = df_final.drop(columns ='trial')
    
    fold_no = 1
    if len(y) < nsplits:
        print('Skip session because not enough trials')
        continue
        
    for train_index, test_index in skf.split(df_final, y):
        
        print('Fold_no:', fold_no)
        model, train_cols, score, sc_fit = train(df_train, decode=decode, align=align, start=start,stop=stop, cluster_list = cluster_list, 
                                  test_index=test_index,  train_index=train_index)

        # Remove a fifth of the dataset so it can be compared to crossvalidated data. If we want to randomly reduce it, add reduce to trian function
        # drop_list = np.array_split(df_train.trial.unique(), 5)[fold_no]
        # df_train = df_train[~df_train['trial'].isin(drop_list)]
        # index_train_trials = df_train.trial.unique()
        # print('Total of left: ', len(df_train.loc[df_train['vector_answer'] == 0].groupby('trial').mean()), '; Total of right: ', len(df_train.loc[df_train['vector_answer'] == 1].groupby('trial').mean()))

        for delay in df.delay.unique():
            try:
                df_delay = df.loc[np.around(df.delay,1)==delay]
                delay=np.around(df_delay.delay.iloc[0],1)
                print('Delay:', delay)
            except:
                continue

            if delay == 0.1 or delay == 0.2:
                endrange=3.5
                r=0.25
            elif delay == 1:
                endrange=4.5
                r=0.25
            elif delay == 3:
                endrange=6.5    
                r=0.25
            elif delay == 10:
                endrange=14.5
                r=0.25

            if delay == 0.1:
                fig, ax1 = plt.subplots(1,1, figsize=(10, 4), sharey=True)
            elif delay == 1:
                fig, ax1 = plt.subplots(1,1, figsize=(10, 4), sharey=True)
            elif int(delay) == 3:
                fig, ax1 = plt.subplots(1,1, figsize=(12, 4), sharey=True)
            elif delay == 10:
                fig, ax1 = plt.subplots(1,1, figsize=(14, 4), sharey=True)

            df_res = pd.DataFrame()
            df_sti = pd.DataFrame()
            df_iter = pd.DataFrame()

            for color, variable,hit,ratio,left in zip(colors,variables,hits,ratios,[ax1,ax1,ax1,ax1]):

                # Create a dataframe for testing data
                if variable == 'all':
                    df_test = df_delay.loc[(df_delay.hit==hit)]
                elif hit == 'all':
                    df_test = df_delay.loc[(df_delay[variable]>=ratio)]
                else:
                    df_test = df_delay.loc[(df_delay[variable]>=ratio)&(df_delay.hit==hit)]
                    
                if fold_no == 1:
                    print(variable, 'Threshold:', ratio, 'Hit:', hit, 'Nº of trials:', len(df_test.trial.unique()))

    # -----------  Remove the trials that overlap with the training set.
                list_train_trials = df_train.trial.unique()[train_index]
                df_test = df_test[~df_test['trial'].isin(list_train_trials)] 
                
                if len(df_test.trial.unique())<5:
                    print('Not enough trials with this condition')
                    continue

                df_real,df_temp = test(df_test, sc_fit, decode= decode,epoch='Stimulus_ON',initrange=-2,endrange=endrange, r=r, model = model, delay_only=delay, 
                                                  variable=variable, hit=hit, nsurrogates = 100,train_cols = train_cols, cluster_list = cluster_list)

                df_sti = pd.concat([df_real,df_sti])
                df_iter = pd.concat([df_iter,df_temp])

                variable = str(variable)+'_'+str(hit)

                # Aligmnent for Stimulus cue
                real = df_sti.loc[(df_sti['trial_type'] ==variable)].mean(axis=0).to_numpy()
                times = np.around(np.array(df_sti.columns)[:-1].astype(float),2)

                df_new= df_iter.loc[(df_iter.epoch=='Stimulus_ON')].groupby('times')['score']
                y_mean= df_new.mean().values
                lower =  df_new.quantile(q=0.975, interpolation='linear')-y_mean
                upper =  df_new.quantile(q=0.025, interpolation='linear')-y_mean
                x=times

                left.set_xlabel('Time to Cue')

                if substract == True:
                    left.plot(times,real-y_mean, color=color)
                    left.plot(x, lower+real-y_mean, color=color, linestyle = '',alpha=0.6)
                    left.plot(x, upper+real-y_mean, color=color, linestyle = '',alpha=0.6)
                    left.fill_between(x, lower+real-y_mean, upper+real-y_mean, alpha=0.2, color=color)
                    left.set_ylim(-0.5,0.6)
                    left.axhline(y=0.0,linestyle=':',color='black')
                    left.fill_betweenx(np.arange(-1,1.15,0.1), 0,0.4, color='grey', alpha=.4)
                    left.fill_betweenx(np.arange(-1.1,1.1,0.1), delay+0.3,delay+0.5, color='beige', alpha=.8)
                    try:
                        a_series = pd.DataFrame(pd.Series(real-y_mean, index = times)).T
                        a_series['trial_type'] = variable
                        a_series['session'] = filename
                        a_series['delay'] = delay
                        a_series['score'] = score    
                        a_series['fold'] = fold_no 

                        df_cum_sti = df_cum_sti.append(a_series, ignore_index=True)

                        df_cum_iter=pd.DataFrame()
                        df_cum_iter['times'] = df_iter.groupby('times').score.mean().reset_index()['times'].values
                        df_cum_iter['delay'] = delay
                        df_cum_iter['session'] = filename
                        df_cum_iter['fold'] = fold_no 
                        df_cum_iter['trial_type'] = variable 

                        for iteration in df_iter.iteration.unique():
                            df_cum_iter[iteration]= df_iter.loc[(df_iter.iteration==iteration)].groupby('times').mean()['score'].values - df_iter.groupby('times').mean()['score'].values

                        df_cum_shuffle = pd.concat([df_cum_iter, df_cum_shuffle])
                    except:
                        print('Did not add to summary: ', variable)
                        continue
                        
                elif substract == False:
                    left.plot(times,real, color=color)
                    left.plot(x, lower+real, color=color, linestyle = '',alpha=0.6)
                    left.plot(x, upper+real, color=color, linestyle = '',alpha=0.6)
                    left.fill_between(x, lower+real, upper+real, alpha=0.2, color=color)
                    left.set_ylim(0,1.1)
                    left.axhline(y=0.5,linestyle=':',color='black')
                    left.fill_betweenx(np.arange(0,1.15,0.1), 0,0.4, color='grey', alpha=.4)
                    left.fill_betweenx(np.arange(0,1.1,0.1), delay+0.3,delay+0.5, color='beige', alpha=.8)

                    try:
                        a_series = pd.DataFrame(pd.Series(real, index = times)).T
                        a_series['trial_type'] = variable
                        a_series['session'] = filename
                        a_series['delay'] = delay
                        a_series['score'] = score    
                        a_series['fold'] = fold_no 

                        df_cum_sti = df_cum_sti.append(a_series, ignore_index=True)

                        df_cum_iter=pd.DataFrame()
                        df_cum_iter['times'] = df_iter.groupby('times').score.mean().reset_index()['times'].values
                        df_cum_iter['delay'] = delay
                        df_cum_iter['session'] = filename
                        df_cum_iter['fold'] = fold_no 
                        df_cum_iter['trial_type'] = variable 

                        for iteration in df_iter.iteration.unique():
                            df_cum_iter[iteration]= df_iter.loc[(df_iter.iteration==iteration)].groupby('times').mean()['score'].values

                        df_cum_shuffle = pd.concat([df_cum_iter, df_cum_shuffle])
                    except:
                        print('Did not add to summary: ', variable)
                        continue

                sns.despine()
        plt.tight_layout()
        plt.show()
        fold_no+=1

In [None]:
delays = [1,3,10]
y_lower = 0
y_upper = 0
baseline = 0.0
# delays = [1,3,10]

for delay in delays:
    fig, ax1 = plt.subplots(1,1, figsize=(10, 4), sharey=True)
#     if delay == 0.1 and hit ==1:
#         fig, ax1 = plt.subplots(1,1, figsize=(10, 4), sharey=True)
#     elif delay == 1 and hit ==1:
#         fig, ax1 = plt.subplots(1,1, figsize=(10, 4), sharey=True)
#     elif delay == 3 and hit ==1:
#         fig, ax1 = plt.subplots(1,1, figsize=(12, 4), sharey=True)
#     elif delay == 10 and hit ==1:
                
    for color, variable in zip(colors,variables_combined):
        print(variable)
        individual_sessions = False
        if individual_sessions == True:
            # Aligmnent for Stimulus cue - sessions separately
            real = df_cum_sti.loc[(df_cum_sti['trial_type'] == variable)&(df_cum_sti['delay'] == delay)].groupby('session').median().reset_index().drop(columns=['fold','session','delay','score'])
            try:
                times = df_cum_sti.loc[(df_cum_sti['trial_type'] == variable)&(df_cum_sti['delay'] == delay)]
                times = np.array(times.drop(columns=['fold','score','trial_type', 'delay','session'],axis = 1).columns.astype(float))
            except:
                times = np.array(df_cum_sti.columns[1:]).astype(float)
        
            left.set_xlabel('Time (s) to Cue')
        
            x=times
            for i in range(len(real)):
                ax1.plot(times,real.iloc[i], color=color,alpha=0.1)
                
        # Aligmnent for Stimulus cue
        real = np.array(np.mean(df_cum_sti.loc[(df_cum_sti['trial_type'] == variable)&(df_cum_sti['delay'] == delay)].groupby('session').median().drop(columns=['fold','delay','score'])))
        
    #     times = np.array(df_cum_sti.loc[:, df_cum_sti.columns != 'trial_type' and df_cum_sti.columns != 'trial_type'].columns).astype(float)
        times = df_cum_sti.loc[(df_cum_sti['trial_type'] == variable)&(df_cum_sti['delay'] == delay)]
        times = np.array(times.drop(columns=['score','trial_type', 'delay','fold','session'],axis = 1).columns.astype(float))
        sns.despine()

        df_lower = pd.DataFrame()
        df_upper = pd.DataFrame()

        for timepoint in times:
            mean_surr = []

            # recover the values for that specific timepoint
            try:
                array = df_cum_sti.loc[(df_cum_sti.trial_type ==variable)&(df_cum_sti['delay'] == delay)].drop(columns='delay').groupby('session').mean()[str(timepoint)].to_numpy()
            except:
                array = df_cum_sti.loc[(df_cum_sti.trial_type ==variable)&(df_cum_sti['delay'] == delay)].drop(columns='delay').groupby('session').mean()[timepoint].to_numpy()

            # iterate several times with resampling: chose X time among the same list of values
            for iteration in range(1000):
    
                x = np.random.choice(array, size=len(array), replace=True)
                # recover the mean of that new distribution
                mean_surr.append(np.mean(x))

            df_lower.at[0,timepoint] = np.percentile(mean_surr, 2.5)
            df_upper.at[0,timepoint] = np.percentile(mean_surr, 97.5)

        lower =  df_lower.iloc[0].values
        upper =  df_upper.iloc[0].values

#         df_new = pd.DataFrame()
#         for iteration in np.arange(1,100):
#             df_new[iteration]= df_cum_shuffle.loc[(df_cum_shuffle.trial_type==variable)].groupby('times').mean()[iteration]

#         y_mean= df_new.mean(axis=1).values
#         upper =  df_new.quantile(q=0.975, interpolation='linear',axis=1) - y_mean
#         lower =  df_new.quantile(q=0.025, interpolation='linear',axis=1) - y_mean
    
        x=times

        ax1.plot(times,real, color=color)
        ax1.plot(x, lower, color=color, linestyle = '',alpha=0.6)
        ax1.plot(x, upper, color=color, linestyle = '',alpha=0.6)
        ax1.fill_between(x, lower, upper, alpha=0.2, color=color)
        ax1.set_ylim(baseline-0.2,baseline+0.3)
        ax1.axhline(y=baseline,linestyle=':',color='black')
        ax1.fill_betweenx(np.arange(baseline-0.1,baseline+0.5,0.1), 0,0.4, color='grey', alpha=.4)
        ax1.fill_betweenx(np.arange(baseline-0.1,baseline+0.5,0.1), delay+0.4,delay+0.6, color='beige', alpha=.8)
        if color=='crimson':
            ax1.set_xlabel('Time (s) to Go')

        sns.despine()
    plt.tight_layout()
    # plt.savefig(save_path+'/delay_'+str(delay)+'_'+align+'_'+trials+'start'+str(start)+'_stop'+str(stop)+'_summary.svg', dpi=300, bbox_inches='tight') 
    plt.show()