# Create general replay condition

__For all participants and all images apply the sav_gol filter, do the upsampling, get the 
velocity profile, do the contrast ramp__

In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
from scipy import interpolate
from replay_condition import *
from merging_data import *
import glob

In [2]:
dat_files = glob.glob('../separate_participant_data/Raw_Data/*.dat')

In [3]:
done_files =  glob.glob('../separate_participant_data/Final_Data/*.dat')

In [4]:
dat_files

['../separate_participant_data/Raw_Data/raw_subj_200.dat',
 '../separate_participant_data/Raw_Data/raw_subj_22.dat',
 '../separate_participant_data/Raw_Data/raw_subj_13.dat',
 '../separate_participant_data/Raw_Data/raw_subj_85.dat',
 '../separate_participant_data/Raw_Data/raw_subj_116.dat',
 '../separate_participant_data/Raw_Data/raw_subj_187.dat',
 '../separate_participant_data/Raw_Data/raw_subj_47.dat',
 '../separate_participant_data/Raw_Data/raw_subj_100.dat',
 '../separate_participant_data/Raw_Data/raw_subj_46.dat',
 '../separate_participant_data/Raw_Data/raw_subj_129.dat',
 '../separate_participant_data/Raw_Data/raw_subj_146.dat',
 '../separate_participant_data/Raw_Data/raw_subj_9.dat',
 '../separate_participant_data/Raw_Data/raw_subj_159.dat',
 '../separate_participant_data/Raw_Data/raw_subj_149.dat',
 '../separate_participant_data/Raw_Data/raw_subj_124.dat',
 '../separate_participant_data/Raw_Data/raw_subj_101.dat',
 '../separate_participant_data/Raw_Data/raw_subj_102.dat',
 '..

In [5]:
subs_list = []
for idx,file in enumerate(dat_files):
    idx_subject_number = dat_files[0].find("j_")+2 #find index of letter C, then add 1 
                                        #so we can get first index of position of the subject number

    end_idx = file.find(".dat")  #find index of _2, which comes after the participant number
    subject = int(file[idx_subject_number:end_idx])
    subs_list.append(subject)  

In [6]:
done_subs_list = []
for idx,file in enumerate(done_files):
    idx_subject_number = done_files[0].find("j_")+2 #find index of letter C, then add 1 
                                        #so we can get first index of position of the subject number

    end_idx = file.find(".dat")  #find index of _2, which comes after the participant number
    subject = int(file[idx_subject_number:end_idx])
    done_subs_list.append(subject)  

In [7]:
done_subs_list

[94,
 105,
 95,
 7,
 133,
 5,
 137,
 170,
 32,
 150,
 52,
 153,
 87,
 90,
 147,
 29,
 17,
 163,
 148,
 168,
 151,
 102,
 99,
 82,
 45,
 79,
 199,
 91,
 62,
 126,
 167,
 18,
 43,
 39,
 25,
 68,
 97,
 41,
 21,
 13,
 120,
 16,
 160,
 190,
 31,
 124,
 56,
 65,
 158,
 119,
 73,
 107,
 141,
 38,
 183,
 200,
 88,
 104,
 195,
 121,
 14,
 138,
 142,
 152,
 123,
 128,
 194,
 70,
 144,
 4,
 177,
 55,
 191,
 80,
 27,
 116,
 36,
 164,
 184,
 71,
 193,
 125,
 115,
 64,
 28,
 180,
 179,
 8,
 127,
 59,
 111,
 83,
 63,
 81,
 117,
 186,
 74,
 100,
 61,
 78,
 182,
 130,
 136,
 15,
 129,
 118,
 67,
 11,
 132,
 139,
 48,
 131,
 46,
 54,
 19,
 108,
 24,
 93,
 154,
 33,
 146,
 162,
 34,
 172,
 42,
 30,
 103,
 72,
 155,
 173,
 85,
 86,
 47,
 192,
 75,
 169,
 187,
 134,
 53,
 197,
 145,
 3,
 23,
 181,
 112,
 178,
 92,
 2,
 66,
 113,
 157,
 1,
 165,
 143,
 12,
 58,
 98,
 49,
 161,
 109,
 44,
 69,
 40,
 156,
 166,
 175,
 174,
 185,
 77,
 135,
 22,
 122,
 159,
 106,
 101,
 9,
 114,
 140,
 50,
 20,
 37,
 198,
 84,

In [8]:
not_yet_done = [element for element in subs_list if element not in done_subs_list]

In [9]:
not_yet_done

[]

# create a function that does: 
1. gets subject number and imports the .dat file
2. from that read all possible image numbers and loop over image numbers
3. apply the sav_gol filter
4. do the upsampling
5. add valid contrasts
6. add invalid contrasts


In [10]:
def final_contrast(sub):
    """
    :param sub: int,subject number
    Writes a .dat file in separate_participant_data/Final_Data 
    :return
    """
    
    lst_str_cols = ['identifier']
    # use dictionary comprehension to make dict of dtypes
    dict_dtypes = {x : 'str'  for x in lst_str_cols}
    # use dict on dtypes
    df = pd.read_csv("../separate_participant_data/Raw_Data/raw_subj_"+str(sub)+".dat", sep = ",", dtype=dict_dtypes)
    #iterate over all images
    images=np.unique(df.imageno)
    df_final = pd.DataFrame(columns=['identifier', 'time', 'x', 'y', 'x_apparent', 'y_apparent', 'pupil',
       'imageno', 'filtertype', 'contrast_app', 'contrast_cont', 'invalid',
       'is_saccade', 'filterregion', 'targetpresent', 'expectedlocation'])
    for count,im in enumerate(images):
        #print(im)
        df_im = df[df.imageno==im]   
        kwargs = {"window_length":13,"polyorder":2,"mode":"nearest"}
        df_filtered = sav_gol(df_im, **kwargs)
        df_upsampled = upsampling(df_filtered)
        df_invalids = add_contrast_invalid(df_upsampled)
        df_apparent = add_contrast_valid_apparent_case(df_invalids)
        df_final = df_final.append(df_apparent)
        
        try:
            assert not (df_final.isnull().values.any())
        except:
            print("image with NaNs",im)
            print("subject number",sub)

    
    df_final = df_final.sort_values(by=['identifier','time'])
    
    contrast_not_one = np.unique(np.where(df_final.contrast_app!=1)[0])
    conditions1 = np.where(df_final['is_saccade'] == 1)
    conditions2 = np.where(df_final['invalid']== 1)
    condition = np.unique(np.append(conditions1[0],conditions2[0]))
    try:
        assert (np.sum(np.isin(condition,contrast_not_one, invert=True))<20)# If elements of condition are in 
    #contrast not one
    except AssertionError:
        print(np.sum(np.isin(condition,contrast_not_one, invert=True)))
        
    assert (np.sum(np.isin(contrast_not_one,condition, invert=True))==0) ## If elements of contrast_not_one are
    #in condition
    

    df_final.to_csv('../separate_participant_data/Final_Data/data_subj_'+str(sub)+'.dat',index=False,
                         columns=list(df_final.columns))

     
    return df_final 

%%time
for idx,sub in enumerate(subs_list):
    print(idx)
    final_contrast(sub)


df = final_contrast(1)
# add three more columns: x_apparent, y_apparent, contrast_apparent

In [11]:
%%time
for idx,sub in enumerate(not_yet_done):
    print("sub",sub)
    final_contrast(sub)


sub 78
sub 79
sub 109
25
sub 173
sub 197
sub 152
sub 163
sub 83
sub 96
sub 11
sub 67
sub 144
sub 38
sub 167
sub 199
sub 98
sub 10
sub 123
sub 15
sub 69
sub 29
sub 122
sub 147
sub 169
sub 132
sub 153
sub 179
sub 25
sub 61
sub 81
sub 64
sub 19
sub 74
sub 115
sub 119
sub 158
sub 35
sub 154
sub 56
sub 43
sub 198
75
sub 41
sub 165
sub 106
31
sub 184
sub 195
sub 177
sub 117
sub 156
sub 161
sub 178
sub 175
24
sub 39
21
sub 45
sub 8
sub 2
sub 59
sub 143
sub 42
sub 160
sub 27
sub 172
sub 30
sub 138
sub 26
sub 3
sub 20
sub 62
sub 18
39
sub 134
sub 190
sub 90
sub 6
sub 193
sub 58
95
sub 17
sub 37
sub 105
31
sub 196
31
sub 180
sub 188
sub 191
sub 104
sub 66
sub 33
sub 88
sub 131
sub 194
sub 148
sub 141
sub 171
sub 70
sub 128
sub 92
sub 170
sub 155
sub 87
37
sub 166
sub 192
sub 120
sub 107
sub 4
90
sub 145
sub 130
sub 164
sub 89
sub 126
sub 23
sub 168
sub 65
sub 75
CPU times: user 1h 36min 32s, sys: 3min 45s, total: 1h 40min 17s
Wall time: 1h 40min 38s
