# Data preprocessing

Endogenous Rhythmic Attention project, 2019-2022

<b>Author</b>: Olof J. van der Werf
<br><b>Last updated</b>: 10-08-2022

[reference + DOI to publication]

### Purpose of this notebook

This notebook preprocesses the data before data analysis.

<ul>
<li> Convolute trials with a Gaussian, creating time series </li>
<li> Detrending data </li>
<li> Permuting the data for analysis </li>
</ul>

### Import necessary libraries

In [21]:
# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

### Functions

In [43]:
# element-wise convolution with a gaussian in the time domain
def convolution(data):
    
    # element-wise subtraction to put in gaussian function
    arr = []
    for t in data['cue-target interval']:
        arr.append(t - intervals)
        
    # gaussian function
    W = np.exp(-(np.power(arr,2))/(2*sigma**2))

    # element-wise multiplication of gussian with values
    H = []
    for i,interval in enumerate(intervals):
        H.append(W[:,i] * data['rt'])
    H = np.transpose(H)
    
    # take sum of each to get time series
    W_sum = np.sum(W,0)
    H_sum = np.sum(H,0)
    ts = H_sum / W_sum
    
    return ts

In [48]:
# element-wise detrending of the data 
def detrend(data,ts):
    coeff = np.polyfit(intervals,ts,1)
    trend = np.polyval(coeff,intervals)
    trend = pd.Series(trend,index = intervals)

    detrended = data.copy()
    detrended['rt'] = ''

    for i, row in data.iterrows():
        detrended.loc[i,'rt'] = data.loc[i,'rt'] - trend[detrended.loc[i,'cue-target interval']] 

    return detrended


### Set variables

In [49]:
# folder where the clean data is
clean_data_folder = '/Volumes/fpn_rdm$/DM0874_OW_EndoRhythSamp/09_Data_after_cleaning/';

# time bins
start_time = 0.491
end_time = 1.690
resolution = 0.001
num_time_bins = int((end_time - start_time) / resolution) + 2
intervals = np.round(np.linspace(start_time, end_time, num = num_time_bins),3)

# frequency bins
low_freq = 2
high_freq = 20
resolution = 0.1
num_freq_bins = int((high_freq - low_freq) / resolution) + 1
frequencies =  np.linspace(low_freq, high_freq, num = num_freq_bins)

# sigma of the Gaussian for the convolution
sigma = 0.01

# number of permutations
nr_of_permutations = 1000

# relevant condition lists
conditions = ['60','80','100']
validities = ['valid','invalid']
visual_fields = ['left','right','both']
subjects = ['03','04','05','06','09','11','12','14','15','17','18','19','20','21','26','27','30','31','32','33','34','35','37','38','39','40']

## Import data

In [50]:
file = clean_data_folder +'trials/trials.csv'
trials = pd.read_csv(file, sep = ',', index_col = 0,dtype = 'str', converters = {'cue-target interval': float, 'rt': float})
trials.head(10)

Unnamed: 0,subject,validity,visual field,condition,cue-target interval,rt
0,3,valid,left,60,1.2,649.998
1,3,valid,left,60,0.917,478.815
2,3,valid,left,60,1.45,526.564
3,3,valid,left,60,1.533,460.036
4,3,valid,left,60,1.05,482.38
5,3,valid,left,60,1.6,530.887
6,3,valid,left,60,1.333,310.527
7,3,valid,left,60,1.25,569.558
8,3,valid,left,60,0.7,526.806
9,3,valid,left,60,1.3,535.347


## Pre-process data

In [53]:
# Main loop
data = pd.DataFrame()


for validity in validities:

    for visual_field in visual_fields:

        for condition in conditions:

            for subject in subjects:
            
                if validity == 'invalid' and condition == '100':
                continue
            
                subset = trials[(trials['subject'] == subject) & 
                                      (trials['validity'] == validity) & 
                                      (trials['visual field'] == visual_field) & 
                                      (trials['condition'] == condition)]
                            
                # convolute
                ts = convolution(subset)
                
                # detrend
                subset = detrend(subset,ts)
                
                # add to data
                data = pd.concat((data,subset))
                
                # run permutations
                interval_permutations = {}
                for i in range(nr_of_permutations):
                    interval_permutations[i] = subset.copy()
                    interval_permutations[i]['cue-target interval'] = np.random.permutation(interval_permutations[i]['cue-target interval'])
                    
                    # convolute
                    ts = convolution(interval_permutations[i])
                    
                    # detrend 
                    interval_permutations[i] = detrend(interval_permutations[i],ts)

                    
# reset index of data
data = data.reset_index(drop=True)
            
# save data
            

In [54]:
data

Unnamed: 0,subject,validity,visual field,condition,cue-target interval,rt
0,03,valid,left,60,1.200,205.213648
1,03,valid,left,60,0.917,38.487637
2,03,valid,left,60,1.450,77.842379
3,03,valid,left,60,1.533,10.007205
4,03,valid,left,60,1.050,39.95801
...,...,...,...,...,...,...
40739,40,invalid,both,80,1.583,-145.750205
40740,40,invalid,both,80,1.567,-48.047096
40741,40,invalid,both,80,1.650,-108.335474
40742,40,invalid,both,80,0.633,88.505892


Unnamed: 0,time,rt,condition,subject
0,1.667,224.169577,60.0,3.0
1,0.700,160.521761,60.0,3.0
2,0.567,174.048930,60.0,3.0
3,1.550,272.652079,60.0,3.0
4,1.017,13.250922,60.0,3.0
...,...,...,...,...
5899,1.583,-145.750205,80.0,40.0
5900,1.567,-48.047096,80.0,40.0
5901,1.650,-108.335474,80.0,40.0
5902,0.633,88.505892,80.0,40.0


In [418]:
# stLSS analysis
for validity in validities:
    for visual_field in visual_fields:
        for condition in conditions:
            
            
            
            powerspec = stLSS(data)
            
            
            # time bin permutations
            time_permutations = pd.DataFrame()
            
            for i in range(nr_of_permutations):
                perm = data.copy()
                perm['time'] = np.random.permutation(perm['time'])
            
            # convolute, detrend, stLSS
            ts_perm = convolution(perm)
            perm = detrend()
            
            
        
        # condition permutations

     time          rt  condition
0   1.383  -84.942343         60
1   1.183   151.12452         60
2   1.367  -27.156234         60
3   0.750  -47.597592         60
4   0.550  -121.83073         60
..    ...         ...        ...
62  1.583 -145.750205         60
63  1.567  -48.047096         60
64  1.650 -108.335474         60
65  0.633   88.505892         60
66  0.533  292.807324         60

[67 rows x 3 columns]
     time          rt  condition
0   1.150  -84.942343         60
1   1.400   151.12452         60
2   0.800  -27.156234         60
3   1.567  -47.597592         60
4   0.617  -121.83073         60
..    ...         ...        ...
62  0.767 -145.750205         60
63  0.833  -48.047096         60
64  0.917 -108.335474         60
65  1.333   88.505892         60
66  0.983  292.807324         60

[67 rows x 3 columns]
     time          rt  condition
0   1.383  -84.942343         60
1   1.183   151.12452         60
2   1.367  -27.156234         60
3   0.750  -47.597592         