In [5]:
'''EEG cleaning script
saves segmented and cleaned files
**requires neuroscan recording and subject beh files (sessions 1 and 2**
neuroscan -> scan_response.ipynb -> subjectMLrun.ipynb'''
import curryreader as cr
import os
from hdf5storage import loadmat, savemat
import numpy as np
import sys
from scipy.stats import zscore
import pandas as pd
import ast

def getFeats(filename):
    blockdf = pd.read_csv(filename)
    stimfeat = []
    answerarray = []
    mlans = []
    refind = []
    resp = []
    for i in blockdf['Experiment Data']: ###read tuple of experiment data
        i = ast.literal_eval(i)
        if 'Trial' in i:
            answerarray.append(i['Correct Answer'])
            mlans.append(i['Model Response'])
            for l in i['All Features']:
                stimfeat.append(l+': '+str(i['All Features'][l]))
            stimfeat.append('Response Cue')
            refind.append(i['Reference Index'])
            resp.append(i['Response'])
    resp = np.array(resp)
    stimfeat = np.array(stimfeat,dtype = 'object') #### array of all features + response cue in order
    answerarray = np.array(answerarray) #### array of all features + response cue in order
    mlans = np.array(mlans,dtype = 'int')
    refind = np.array(refind,dtype = 'int')
    return stimfeat, answerarray, mlans, refind ,resp

def eventdata(photocell,events):
    x = zscore(photocell)
    temp = np.where(x > 1)[0]
    temp2 = np.diff(temp)#temp[1:] - temp[:-1]
    temp3 = np.where(temp2 > 1)[0]
    stimonsets = np.zeros(len(temp3)+1)
    stimonsets[0] = temp[0]
    stimonsets[1:] = temp[temp3+1]
    stimdiff = stimonsets[1:]-stimonsets[:-1]
    trialonsets = stimonsets[range(0,len(stimonsets),10)]
    trialoffsets = stimonsets[range(9,len(stimonsets),10)]
    responseindex = np.where((events[:,1]==2) | (events[:,1]==32))[0]
    eresponsetime = events[responseindex,0]
    eresponsechoice = events[responseindex,1]
    responsetime = np.zeros(len(trialonsets))
    responsechoice = np.full(len(trialonsets),-1)
    for j in range(len(trialonsets)-1):
        theresponses = np.where((eresponsetime > trialoffsets[j]) & (eresponsetime < trialoffsets[j] + 1000) & (eresponsetime < trialonsets[j+1]))[0]
        if any(theresponses):
            responsetime[j] = eresponsetime[theresponses[0]]
            responsechoice[j] = eresponsechoice[theresponses[0]]  #historically we've always takes the first.  We should discuss this, perhaps, if there is more than 1 response we should ignore the trial.
    #    if len(theresponses) == 1:
    #        responsetime[j] = eresponsetime[theresponses]
    #        responsechoice[j] = eresponsechoice[theresponses]
    theresponse  =  np.where((eresponsetime > trialoffsets[-1])& (eresponsetime < trialoffsets[-1] + 1000))[0]
    if any(theresponse):
        responsetime[-1] = eresponsetime[theresponse[0]]
        responsechoice[-1] = eresponsechoice[theresponse[0]]
    return stimonsets, trialonsets, trialoffsets, responsetime, responsechoice 

def segmentdata(stimonsets,trialonsets,prestim,poststim):
	triallabel = np.zeros(len(stimonsets))
	poslabel = np.zeros(len(stimonsets))
	for j in range(len(trialonsets)):
		triallabel[j*10:(j+1)*10] = j
	for j in range(len(stimonsets)):
		poslabel[j] = int(j%10)
	time = np.arange(-prestim,poststim)
	return triallabel,poslabel,time

def getevents(fname):
	currydata = cr.read(fname, plotdata = 0, verbosity = 1)
	photocells=currydata['data'][:,132:134]
	events = currydata['events']
	return events,photocells

## Check where it doesn't match pattern
def realignStim(stimonsets,stimpertrial,trialsperfile):
    times = [j for j,i in enumerate(np.diff(stimonsets)) if i > 850]
    splitstims = np.split(np.diff(stimonsets), times)
    times = []
    cum = 1
    for i in splitstims:
        cum += len(i)
        times.append(cum)
    newstim = []
    for i in np.split(stimonsets,times):
        if len(i) == stimpertrial:
            newstim.append(i)
        else:
            newstim.append(np.zeros(10))
    stimonsets = np.array(newstim).reshape(np.shape(newstim)[0]*np.shape(newstim)[1])[:stimpertrial*trialsperfile]
    return stimonsets
def fixeventdata(photocell,events,stimpertrial,trialsperfile):
    x = zscore(photocell)
    temp = np.where(x > 1)[0]
    temp2 = np.diff(temp)#temp[1:] - temp[:-1]
    temp3 = np.where(temp2 > 1)[0]
    stimonsets = np.zeros(len(temp3)+1)
    stimonsets[0] = temp[0]
    stimonsets[1:] = temp[temp3+1]
    stimonsets = realignStim(stimonsets,stimpertrial,trialsperfile)
    
    stimdiff = stimonsets[1:]-stimonsets[:-1]
    trialonsets = stimonsets[range(0,len(stimonsets),10)]
    trialoffsets = stimonsets[range(9,len(stimonsets),10)]
    responseindex = np.where((events[:,1]==2) | (events[:,1]==32))[0]
    eresponsetime = events[responseindex,0]
    eresponsechoice = events[responseindex,1]
    responsetime = np.zeros(len(trialonsets))
    responsechoice = np.full(len(trialonsets),-1)
    for j in range(len(trialonsets)-1):
        theresponses = np.where((eresponsetime > trialoffsets[j]) & (eresponsetime < trialoffsets[j] + 1000) & (eresponsetime < trialonsets[j+1]))[0]
        if any(theresponses):
            responsetime[j] = eresponsetime[theresponses[0]]
            responsechoice[j] = eresponsechoice[theresponses[0]]  #historically we've always takes the first.  We should discuss this, perhaps, if there is more than 1 response we should ignore the trial.
    #    if len(theresponses) == 1:
    #        responsetime[j] = eresponsetime[theresponses]
    #        responsechoice[j] = eresponsechoice[theresponses]
    theresponse  =  np.where((eresponsetime > trialoffsets[-1])& (eresponsetime < trialoffsets[-1] + 1000))[0]
    if any(theresponse):
        responsetime[-1] = eresponsetime[theresponse[0]]
        responsechoice[-1] = eresponsechoice[theresponse[0]]
    return stimonsets, trialonsets, trialoffsets, responsetime, responsechoice 

In [6]:
# I suggest Fp1,Fpz, and Fp2
prestim = 200
poststim = 800 
sub = '105'
ses = '1'
eegpath = f'../data/Subject{sub}/S{sub}SS{ses}/'
files = os.listdir(eegpath)
filenames  = [f for f in files if f.endswith('.cdt')]
filenames.sort()
print(filenames)
nfiles = len(filenames)
behpath = f'../data/Subject{sub}/{sub}Session{ses}/'
behfile = [f for f in os.listdir(behpath) if sub in f and 'csv' in f and 'ses'+ses in f and '#' not in f and 'block2' in f][0]
featarray, answerarray, mlresponse, refindarray,resparray = getFeats(behpath+behfile)

if ses == '1':
    trialsperfile = 65
else:
    trialsperfile = 80
stimpertrial = 10
stimperfile = trialsperfile*stimpertrial 
ntrials = nfiles*trialsperfile
nstim = ntrials*stimpertrial 
# stimdata = np.zeros((nstim,prestim+poststim,nEEGchan))
stimblocklabel = np.zeros(nstim)
for j in range(nfiles):
	stimblocklabel[j*stimperfile:(j+1)*stimperfile] = j
stimtriallabel = np.zeros(nstim)
stimposlabel = np.zeros(nstim)
stimresponsechoice = np.full(ntrials,-1)
stimresponsetime = np.zeros(ntrials)
# trialon = np.zeros(ntrials)
# trialoff = np.zeros(ntrials)
for f in range(nfiles):
	events,photocells  = getevents(eegpath+filenames[f])
	try: ## catch any errorneous pcells in beginning
		expstart = events[np.where(events == 800001)[0][-1]][0]
		offset = events[np.where(events == 800001)[0][-1]][0]
		events = events[np.where(events == 800001)[0][-1]:]
		events[:,[0,2,3]] = events[:,[0,2,3]] - offset
		photocells = photocells[int(expstart):]
	except:
		pass
	try:    
		stimonsets, trialonsets, trialoffsets, responsetime, responsechoice = eventdata(photocells[:,0],events)
		triallabel,poslabel,time = segmentdata(stimonsets,trialonsets,prestim,poststim)
    #organize information
		stimtriallabel[f*stimperfile:(f+1)*stimperfile] = triallabel+f*trialsperfile #this might not be ideal.  
		stimposlabel[f*stimperfile:(f+1)*stimperfile] = poslabel
		stimresponsechoice[f*trialsperfile:(f+1)*trialsperfile] = responsechoice 
		stimresponsetime[f*trialsperfile:(f+1)*trialsperfile] = responsetime - trialoffsets
		stimresponsechoice[np.where(stimresponsechoice == 32)] = 0
		stimresponsechoice[np.where(stimresponsechoice == 2)] = 1
	except ValueError:
        # check if recording error
		if stimonsets.shape[0] < trialsperfile * stimpertrial and stimonsets.shape[0] % 10 != 0:
			print('Problem Recording',f)
			stimonsets, trialonsets, trialoffsets, responsetime, responsechoice = fixeventdata(photocells[:,0],events,stimpertrial,trialsperfile)
			triallabel,poslabel,time = segmentdata(stimonsets,trialonsets,prestim,poststim)
			#organize information
			stimtriallabel[f*stimperfile:(f+1)*stimperfile] = triallabel+f*trialsperfile #this might not be ideal.  
			stimposlabel[f*stimperfile:(f+1)*stimperfile] = poslabel
			stimresponsechoice[f*trialsperfile:(f+1)*trialsperfile] = responsechoice 
			stimresponsetime[f*trialsperfile:(f+1)*trialsperfile] = responsetime - trialoffsets
			stimresponsechoice[np.where(stimresponsechoice == 32)] = 0
			stimresponsechoice[np.where(stimresponsechoice == 2)] = 1
		else:
        # Will only fill until end of session (Will run if fisnish early)
			print('Finished Early in',f)
			trialsperfile = stimonsets.shape[0] // 10
			stimtriallabel[f*stimperfile:(f+1)*(trialsperfile*stimpertrial)] = triallabel+f*trialsperfile #this might not be ideal.  
			stimposlabel[f*stimperfile:(f+1)*(trialsperfile*stimpertrial)] = poslabel
			stimresponsechoice[f*trialsperfile:(f+1)*trialsperfile] = responsechoice 
			stimresponsetime[f*trialsperfile:(f+1)*trialsperfile] = responsetime - trialoffsets
			stimresponsechoice[np.where(stimresponsechoice == 32)] = 0
			stimresponsechoice[np.where(stimresponsechoice == 2)] = 1
	# trialon[f*trialsperfile:(f+1)*trialsperfile] = trialonsets
	# trialoff[f*trialsperfile:(f+1)*trialsperfile] = trialoffsets
#outputs are stimdata, stimblocklabel,stimtriallabel,stimposlabel, stimresponsechoice, stimresponsetime,time, 1,50,sr, labels,chanloc

['S105SS1B1.cdt', 'S105SS1B2.cdt', 'S105SS1B3.cdt']
Problem Recording 2


In [7]:
outpath = 'scanned/'
outname = f'{sub}SS{ses}_scanned.hdf'
outdict = dict()
outdict['block'] = stimblocklabel
outdict['trial'] = stimtriallabel
outdict['position']  = stimposlabel
outdict['choice'] = stimresponsechoice 
outdict['responsetime'] = stimresponsetime
outdict['time'] = time
outdict['answer'] = answerarray
outdict['ordered_features'] = featarray
outdict['model_response'] = mlresponse
outdict['index order'] = refindarray
try:
    savemat(outpath+outname,outdict,store_python_metadata=True)
except OSError:
    os.mkdir(outpath)
    savemat(outpath+outname,outdict,store_python_metadata=True)