# Motion Coherence Experiment Analysis Flow

## Glossary of Variable Names:
* moz/MOz = Mert Özkan
* mc: motion coherence
* f: file
* dat: data
* l: line
* idx: index
* ptr: pointer
* trl: trial
* no: number (rank)
* dxn: direction
* coh: coherence
* ok: correct
* is: prefix such as isOK = is it correct?
* rt: reaction time
* t: time
* st: string
* i: integer
* stc: sentence
* w: word
* inv: inventory
* cond: condition
* sq: order/sequence
* sxn: session
* rej: rejected
* sub: subject/participant
* w: with
* wh: *prefix* which/what
* prev: previous
* pc: percent/percentage
* dr: directory
* arr: array
* kw: keyword
* q: quantity/number
* avg: average/mean
* sd: std/standard deviation
* rxn: reaction/response
* lvl: level
* spcl: special
* lst: list
* el: element

# Synchronising Psychophysical and EEG Trials

### Import Data from Log Files

In [1]:
def import_psyphys_mc(f_name,path):
    '''
    Feb 22, 2019
    Mert Ozkan
    Dartmouth College
    Motion Coherence
    
    Imports datasets from log files for the Motion Coherence experiment.
    Usage: trl_no, dxn, coh, isOK, key, rt = import_psyphys_mc(f_name,path)
    '''
    
    import numpy as np
    
    dat_path = '/'.join([path,f_name])
    f = open(dat_path,'r')
    
    # Data log starts 2 lines after the data pointer
    dat_ptr = '%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%'
    read_l = f.readlines()
    for idx in range(len(read_l)):
        if dat_ptr in read_l[idx]:
            dat_start_idx = idx + 2
            break
    trl_no = []
    dxn = []
    coh = []
    isOK = []
    key = []
    rt = [] 
    for trl in read_l[dat_start_idx:]:
        trl_dat = trl.split()
        trl_no.append(int(trl_dat[0]))
        dxn.append(int(trl_dat[1]))
        coh.append(float(trl_dat[2]))
        isOK.append(int(trl_dat[3]))
        if trl_dat[4] == 'space' or trl_dat[4] == '-':
            key.append(0)
        else:
            key.append(int(trl_dat[4]))
            
        rt.append(float(trl_dat[5]))
    
    trl_no = np.array(trl_no)
    dxn = np.array(dxn)
    coh = np.array(coh)
    isOK = np.array(isOK)
    key = np.array(key)
    rt = np.array(rt)
    return trl_no, dxn, coh, isOK, key, rt

### Import Subject Data inventory

In [2]:
def import_subjectdatainfo_mc(path):
    '''
    Feb 22, 2019
    Mert Ozkan
    Dartmouth College
    Motion Coherence
    
    Import subject data inventory from data_inventory_mc.txt
    Usage: sub_no, sub_name, sxn_sq, bhv_ptr, eeg_ptr, trl_rej_1, trl_rej_2 = import_subjectdatainfo_mc(path)
    '''
    
    import numpy as np
    
    f = open(path,'r')
    read_l = f.readlines()
    
    sub_no = []
    sub_name = []
    sxn_sq = []
    bhv_ptr = []
    eeg_ptr = []
    trl_rej_1 = []
    trl_rej_2 = []
    for idx in range(len(read_l)):
        if '*' in read_l[idx]:
            inv_st = read_l[idx+1].split('; ')
            sub_no.append(int(inv_st[0]))
            sub_name.append(inv_st[1])
            sxn_sq.append(
                    np.array(inv_st[2].split(', ')).astype(int))
            bhv_ptr.append(inv_st[3])
            eeg_ptr.append(inv_st[4])
            trl_rej_1.append(
                    np.array(inv_st[5].split(', ')).astype(int))
            if inv_st[6][-1:] == '\n':
                inv_st[6] = inv_st[6][:-1]
            if inv_st[6] != '':
                trl_rej_2.append(
                    np.array(inv_st[6].split(', ')).astype(int))
            else:
                trl_rej_2.append(
                    np.array([]).astype(int))
    return sub_no, sub_name, sxn_sq, bhv_ptr, eeg_ptr, trl_rej_1, trl_rej_2

### Find the unique file name in a path, given two keywords

In [3]:
def findfilenameindirectory_mc(path, kw, numberoffiles=None, maxnumberoffiles=None, minnumberoffiles=None):
    import os
    
    qKW = len(kw)
    found = 0
    contents = os.listdir(path)
    subset_contents = contents
    for whKW in kw:
        contents = subset_contents
        subset_contents = []        
        for whContent in contents:
            if whKW in whContent:
                found += 1
                subset_contents.append(whContent)
    if (numberoffiles != None and numberoffiles != len(subset_contents)) or (
        maxnumberoffiles != None and maxnumberoffiles < len(subset_contents)) or (
        minnumberoffiles != None and minnumberoffiles > len(subset_contents)):
        print('''
        
        File Quantity Mismatch! in findfilenameindirectory_mc()
        Expected number of files: {}
        Expected maximum number of files: {}
        Expected minimum number of files: {}
        Number of files found: {}
        Keywords: {}
        Filenames: {}
        Path = {}
        
        '''.format(numberoffiles,
                   maxnumberoffiles,
                   minnumberoffiles,
                   len(subset_contents),
                   kw,
                   subset_contents,
                   path)
             )
    return subset_contents

### Combine the 3  behavioural data files in the order the experiment was conducted.
*the experiment included 3 successive sessions for 3 conditions but the eeg recording was continuous*

In [4]:
def combineconditions_mc(whSub, sq, path):
    import numpy as np
    import os
    
    conds_id = np.array(['translational.log', 'rotational.log', 'radial.log']) # The index corresponding to the condition number
    conds_sq = conds_id[sq]
    
    prev_sxn_trl_no = 0
    prev_trl_no = np.array([])
    prev_dxn = np.array([])
    prev_coh = np.array([])
    prev_isOK = np.array([])
    prev_key = np.array([])
    prev_rt = np.array([])
    prev_cond_arr = np.array([])
    
    for cond in conds_sq:
        f_name = findfilenameindirectory_mc(path, [whSub, cond],numberoffiles=1)
        if len(f_name) > 1:
            print('''
            No unique file for '{}' and {}
            <combineconditions_mc>
            '''.format(whSub,cond))
        trl_no, dxn, coh, isOK, key, rt = import_psyphys_mc(f_name[0],path)
        
        cond_arr = np.array(
            [np.where(
            conds_id==cond
            )]*len(trl_no)
        )        
        trl_no += prev_sxn_trl_no
        prev_sxn_trl_no = trl_no[-1]
        
        prev_trl_no = np.concatenate((prev_trl_no,trl_no), axis=None)
        prev_cond_arr = np.concatenate((prev_cond_arr,cond_arr), axis=None)
        prev_dxn = np.concatenate((prev_dxn,dxn), axis=None)
        prev_coh = np.concatenate((prev_coh,coh), axis=None)
        prev_isOK = np.concatenate((prev_isOK,isOK), axis=None)
        prev_key = np.concatenate((prev_key,key), axis=None)
        prev_rt = np.concatenate((prev_rt,rt), axis=None)
        
    return prev_trl_no, prev_cond_arr, prev_dxn, prev_coh, prev_isOK, prev_key, prev_rt
# import numpy as np
# whSub = 'ENC'
# sq = np.array([2, 1, 0])
# path = '/Users/mertozkan/Documents/Psyc161FinalProject/MotionCoherence/data'
# prev_trl_no, prev_cond_arr, prev_dxn, prev_coh, prev_isOK, prev_key, prev_rt = combineconditions_mc(whSub, sq, path)
# print(prev_cond_arr)

## Synchronise Data Files

### Synchronise behavioural data to eeg data
1. Import subject data information
2. Combine 3 condition files of each subject according to the session order
3. Mark invalid key press:
    * log files registered these as __key='space'__ or __'-'__, outputted here as __key=0__
4. Discard rejected trials at Artefact Rejection 1 & 2
5. Mark trials that are not valid according to Reaction Time criteria:
    * AVG-2SD > RT > AVG + 2SD
6. Write the new data file in .csv format and save it in the data folder:
    * fname: **{*behavioural_pointer*}_subdatbhv.csv**
7. Register following variables in a .csv file:
    * *info_sxnstats.csv*
    * sub_no, sub_name, bhv_ptr, eeg_ptr, qTrlRej, qTrlVld, qTrlVldKey, qTrlVldRT

In [5]:
import numpy as np
import os

sub_no, sub_name, sxn_sq, bhv_ptr, eeg_ptr, trl_rej_1, trl_rej_2 = import_subjectdatainfo_mc('/Users/mertozkan/Documents/Psyc161FinalProject/MotionCoherence/MC/data_inventory_mc.txt')
dat_path = '/Users/mertozkan/Documents/Psyc161FinalProject/MotionCoherence/data'

prev_dr = os.getcwd()
os.chdir(dat_path)

f_info = open('info_sxnstats.csv','w')
f_info.write('''# Session Information
# Subject data is registered in "{sub_name}_subdatabhv.csv" files
# sub_no, sub_name, bhv_ptr, eeg_ptr, qTrlRej, qTrl, qTrlVld, qTrlVldKey, qTrlVldRT
''')
for n in range(len(sub_no)):
    trl_no, cond, dxn, coh, isOK, key, rt = combineconditions_mc(bhv_ptr[n], sxn_sq[n]-1, dat_path)
    
    check_script = len(trl_no)-(len(trl_rej_1[n])+len(trl_rej_2[n]))
    qTrl = len(trl_no)
    qTrlRej = len(trl_rej_1[n])+len(trl_rej_2[n])
    
    # Mark validity 
    
    vld_key = key != 0

    dat = np.matrix([trl_no, cond, dxn, coh, key, isOK, rt, vld_key])
    dat = np.delete(dat,trl_rej_1[n]-1,1)
    dat = np.delete(dat,trl_rej_2[n]-1,1)
    
    vld_key_post_rej = dat[7].astype(bool)
    rt_post_rej = dat[6]
    rt_avg = np.mean(rt_post_rej[vld_key_post_rej])
    rt_sd = np.std(rt_post_rej[vld_key_post_rej])
    
    # Implement reaction time criteria
    vld_rt = np.logical_and((rt_post_rej <= (rt_avg + 2*rt_sd)),(rt_post_rej >= (rt_avg - 2*rt_sd)))
    vld_rt = np.logical_and((rt_post_rej > .1), vld_rt)
    
    # Separate trials with valid reaction keys from trials with valid reaction times
    vld_rt = np.logical_or(
        np.logical_not(vld_key_post_rej),vld_rt
    )

    qTrlVldKey = np.sum(vld_key_post_rej)
    qTrlVldRT = np.sum(vld_rt)
    qTrlVld = np.sum(np.logical_and(vld_key_post_rej,vld_rt))
    
    dat  = np.concatenate(
        (dat,vld_rt), axis=0
    )
    if check_script != dat.shape[1]:
        print('The number of trials do not match! Subject Number: {}'.format(n+1))
        
    f_name = '{}_subdatbhv.csv'.format(sub_name[n])
    dat = np.transpose(dat)
    
    np.savetxt(f_name,
                 dat, delimiter = ', ',
                 header = '''
                 Behavioural Data Log:
                 
                 All trials match to the eeg trials.
                 Valid trials are marked.
                 
                 Subject Initials: sub_name = {}
                 Subject Number: sub_no = {}
                 
                 trl_no, sxn_sq, dxn, coh, key, isOK, rt, isVld_key, isVld_rt
                 '''.format(sub_name[n], sub_no[n]
                           ), fmt='%1.5e')
    f_info.write('''{}, {}, {}, {}, {}, {}, {}, {}, {}\n'''.format(
        sub_no[n], sub_name[n], bhv_ptr[n], eeg_ptr[n], qTrlRej, qTrl, qTrlVld, qTrlVldKey, qTrlVldRT)
                )
f_info.close()
os.chdir(prev_dr)



## Psychophysical Curve Fitting

### Data Log for Bayesian Curve Fitting
1. __Read each '{}_subdatbhv.csv' file__
2. __Separate each condition 3x6 (Motion Type x Coherence Level)__
3. __Compute number of correct responses for each condition__
4. __Write in a .csv file in the following format:__ 
    * sub_no, cond_no, coh_lvl, qOK, qTrl
    * *datbhv_pfit.csv*
    
5. __Register the index number of valid and correct trials in another .csv file:__
    * sub_no, trl_vld, trl_ok
    * *info_trials.csv*

In [13]:
def importfromcsv(path, whType='str'):
    '''
    Imports data from .csv files.
    The format of the file should follow the rules below:
        1. Lines starting with '#' are ignored. A description of the file should be given here.
        2. If data contains a list:
            a. Elements should be separated with space.
            b. The function will return the list as a string without characters '[' or ']'.
    '''
    import numpy as np
    
    if whType not in ['str', 'float', 'int','bool']:
        print('''whType should be equal to one of the following: 'str', 'float', 'int','bool''')

    f = open(path,'r')
        
    f_l = f.readlines()
    
    dat = []
    for l in f_l:
        if l[0] != '#':
            trl = l.split(', ')
            for idx in range(len(trl)):
                for spcl_char in ['\n','[',']']:
                    if spcl_char == '[' and whType != 'str':
                        whType = 'str'
                        print('The file contains a list. Therefore, the output will be returned as type string!')
                    if spcl_char in trl[idx]:
                        trl[idx] = trl[idx].replace('\n','')
            dat.append(trl)
    dat = np.array(dat).astype(whType)
    return dat
# path = '/Users/mertozkan/Documents/Psyc161FinalProject/MotionCoherence/data/ENC_subdatbhv.csv'
# importfromcsv(path, whType='str')

array([['2.00000e+00', '2.00000e+00', '1.00000e+00', ..., '8.71096e-01',
        '1.00000e+00', '0.00000e+00'],
       ['3.00000e+00', '2.00000e+00', '1.00000e+00', ..., '7.01238e-01',
        '1.00000e+00', '0.00000e+00'],
       ['4.00000e+00', '2.00000e+00', '-1.00000e+00', ..., '8.61504e-01',
        '1.00000e+00', '0.00000e+00'],
       ...,
       ['7.35000e+02', '0.00000e+00', '0.00000e+00', ..., '2.80019e-01',
        '1.00000e+00', '1.00000e+00'],
       ['7.36000e+02', '0.00000e+00', '1.80000e+02', ..., '5.64291e-01',
        '1.00000e+00', '1.00000e+00'],
       ['7.37000e+02', '0.00000e+00', '0.00000e+00', ..., '2.90537e-01',
        '1.00000e+00', '1.00000e+00']], dtype='<U12')

In [11]:
def array2columnvectors(arr):
    col = []
    for whCol in range(arr.shape[1]):
        col.append(arr[:,whCol])
    return tuple(col)

In [12]:
import os

dat_path = '/Users/mertozkan/Documents/Psyc161FinalProject/MotionCoherence/data'
prev_dr = os.getcwd()
os.chdir(dat_path)



info = importfromcsv('info_sxnstats.csv')

f_dat = open('datbhv_pfit.csv','w')
f_dat.write('''# Data for Psychophysical Curve Fitting
# sub_no, mot, coh, qOK, qTrl
''')

f_info = open('info_trials.csv','w')
f_info.write('''# Information for Correct and Valid Trial Indices per Each Subject
# to be used while matching eeg trials
# sub_no, trl_vld, trl_ok
''')

for sub in info:
    sub_no = sub[0]
    sub_ptr = sub[1]
    dat = importfromcsv('{}_subdatbhv.csv'.format(sub_ptr), whType='float')
    trl_no, mot, dxn, coh, key, isOK, rt, isVld_key, isVld_rt = array2columnvectors(dat)
    isVld = np.logical_and(isVld_key, isVld_rt)
    
    trl_ok = np.where(isOK)[0]
    trl_vld = np.where(isVld)[0]
    f_info.write('{}, {}, {}\n'.format(sub_no, trl_vld, trl_ok))
    for whMot in np.unique(mot): # 0 1 2: Tr Ra Ro
        for whCoh in np.unique(coh):
            curr_cond = np.logical_and(
                np.logical_and(
                    np.equal(mot,whMot), np.equal(coh,whCoh)
                ), isVld
            )
            
            qTrl = np.sum(curr_cond)
            qOK = np.sum(isOK[curr_cond])
            
            
            f_dat.write('{}, {}, {}, {}, {}\n'.format(sub_no, whMot, whCoh, qOK, qTrl))

f_dat.close()
f_info.close()

### Curve Fitting