# Notebook 01: Preprocessing

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook', font_scale=1.25)
%matplotlib inline

## Section 1: Preprocess and collate data

In [2]:
from pandas import read_csv, concat

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Load and prepare data.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Locate files.
files = sorted([f for f in os.listdir('raw') if f.startswith('LOGS')])

## Main loop.
data = []
for f in files:

    ## Load data.
    df = read_csv(os.path.join('raw',f), sep=' ')

    ## Rename columns.
    df.columns = ('Subject','Trial','Condition','Cue',
                  'OutcomeCorrect','OutcomeIncorrect','OutcomeNoPress',
                  'Outcome','Target','Choice','Accuracy','RT','')

    ## Update subject.
    df['Subject'] = int(''.join([s for s in f if s.isnumeric()]))
    
    ## Update condition info.
    df['Condition'] = np.where(df.Condition==1, 'Threat', 'Safe')
    df['Valence'] = df.Cue.replace({1:'Gain', 2:'Loss', 3:'Gain', 4:'Loss'})
    df['Action'] = df.Cue.replace({1:'Go', 2:'Go', 3:'No-Go', 4:'No-Go'})
    
    ## Update trial info.
    tally = lambda arr: np.arange(arr.size) + 1
    df['Trial'] = df.groupby('Condition').Trial.transform(tally)
    df['Exposure'] = df.groupby(['Condition','Cue']).Trial.transform(tally)
    df['Block'] = ((df['Trial'] - 1) // 20) +1
    
    ## Restrict to columns of interest.
    df = df[['Subject','Condition','Block','Trial','Exposure','Cue','Valence','Action',
             'Target','Choice','Accuracy','RT','Outcome']]
    
    ## Append.
    data.append(df)
    
## Concatenate data.    
data = concat(data)

#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
### Load and prepare metadata.
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#

## Load metadata.
metadata = read_csv(os.path.join('raw','STAIData.csv'))

## Update diagnostic category.
metadata['Diagnosis'] = metadata.Diagnosis.replace({'HC':'HC','patient':'Anx'})

## Update subject info.
to_numeric = lambda label: int(''.join([s for s in label if s.isnumeric()]))
metadata['Subject'] = metadata.Subject.apply(to_numeric)

## Merge with data.
data = data.merge(metadata, on='Subject')

## Save.
data.to_csv('data.csv', index=False)