Analysis of pupil data from the confidenced project
------------------------------------------------

Goal: Design a linear model that explains pupil data from the confidence project. 

Strategy: Use one example subject to design a model and then compare model predictions to event related averages

In [1]:
import cPickle
from conf_analysis import pupil, patsy_transforms as pt
from pylab import *
import seaborn as sns
import pandas as pd
%load_ext autoreload
%autoreload 2
%matplotlib inline
sns.set_style('ticks')

In [2]:
import gzip
# Load data from all subs
def get_data(sub):
    data = cPickle.load(gzip.open('../temp_data/' + sub+'.pickle.gzip'))
    events, messages = data['events'], data['messages']
    events = events.reset_index()
    del events['level_0']
    del events['level_1']
    events.rename(columns={'level_2':'sample_time'}, inplace=True)
    events['sample_time'] = events.sample_time.astype(int)
    events['subject'] = sub
    events.set_index(['session', 'block', 'subject', 'sample_time'], inplace=True)    
    messages.feedback[messages.feedback==0] = -1
    messages['subject'] = sub
    messages = messages.reset_index()
    del messages['level_0']
    del messages['level_1']
    del messages['level_2']
    messages.set_index(['session', 'block', 'subject', 'trial'], inplace=True)    
    #def zscore(ev):
    #    ev['pa'] = (ev.pa-ev.pa.mean())/ev.pa.std()
    #    return ev
    #events = events.groupby(level='session').apply(zscore)
    return events, messages

In [3]:
events, messages = [], []
for sub in ['S%02i'%i for i in [1,2,3,4,5,6,7,9,10,11,12,13,14]]:
    e, m = get_data(sub)
    events.append(e)
    messages.append(m)
events = pd.concat(events)
messages = pd.concat(messages)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [None]:
messages.sort_index(inplace=True)
events.sort_index(inplace=True)
print messages.index.names, messages.index.lexsort_depth
print events.index.names, events.index.lexsort_depth
messages = messages.loc[~isnan(messages.decision_time)]

In [None]:
whos

In [4]:
import cPickle, gzip
#file = gzip.open('../temp_data/allsubs.df.gzip', 'w')
cPickle.dump((events, messages), open('../temp_data/allsubs.df', 'w'), protocol=2)

SystemError: error return without exception set

In [None]:
e = events.reset_index()

In [None]:
from pyedfread import edfread
from conf_analysis import datadef
corrupt = dict((k, []) for k in datadef.sb2fname.keys())
for s in range(1, 15):
    for session, vals in datadef.sb2fname['S%02i'%s].iteritems():
        for block, (edf, mat) in vals.iteritems():
            try:
                edfread.read_preamble(edf)
            except:
                corrupt['S%02i'%s].append(edf)

In [None]:
for sub in range(1, 15):
    try:
        nsession = len(events.xs('S%02i'%sub, level='subject').groupby(level=['session', 'block']))
        bar(sub-0.5, nsession, 1)
        lc = len(corrupt['S%02i'%sub])
        bar(sub-0.5, lc, bottom=nsession, width=1, color='r')
    except Exception as e :
        print e
for k in [5, 10, 15, 20, 25]:
    plot([0.5, 15.5], [k,k], 'k--')

What is missing:
---------------
    - S1: 25 files available: 7,8,10,16,17.12 2015. Complete but corrupt.
    - S2: Complete
    - S3: 26 files available. Corruption!
    - S4: One session is missing in datadef.py. Now corrected. Should be 25 after next conversion run 
    - S5: 25 files in datadef, corrupt!
    - S6: 25 files in datadef, corrupt!
    - S7: Block ordering in datadef not correct, now corrected. Wait for next conversion run.
    - S9: Has 25 files in datadef. 1st block was missing in datadef on cluster.
    - S10: One measurement had to be aborted because of license issues, one file was not saved.
    - S11: Corrupt files. 
    - S12: Wait for next conversion
    - S13 + S14: Wait for next conversion 

The data
--------

The next plot shows the raw pupil data across all available blocks for S2. The shown data has been band pass filtered in a range of [0.01 and 10] Hz.

In [None]:
figure(figsize=(18,12))
for i, (snum, sdata) in enumerate(events.groupby(level='subject')):
    subplot(12, 1, i+1)
    plot(sdata.pa.values)
    #ylim(-5, 5)
    
sns.despine()
#xlim([295000, 300000])
#ylim([-2.5, 2.5])

In [None]:
events = events.groupby(level='session').apply(pupil.cleanup)


In [None]:
 287231/5./100

In [None]:
figure(figsize=(18,12*5))
for i, (snum, sdata) in enumerate(events.groupby(level=['block', 'session'])):
    subplot(15*5, 1, i+1)
    t = arange(len(sdata))
    plot(t, sdata.pa.values)
    plot(t, sdata.pac.values, 'r')
    #ylim(-5, 5)
    yticks([])
    xticks([])
    xlim([0, 287231/5.])
    title(snum)
sns.despine()
tight_layout()
#ylim([-2.5, 2.5])

In [None]:
def zscore(ev):
    ev['pac'] = (ev.pac - ev.pac.mean())/ev.pac.std()
    return ev
events = events.groupby(level='session').apply(zscore)

In [None]:
figure(figsize=(18,3))
t = arange(len(events))/100.
plot(t, events.pac.values)
sns.despine()

#ylim([-2.5, 2.5])

The Model
---------

I use a simple GLM and convolve regressors with JW's standard IRF. I first compute the convolution kernels and then add some event fields that are useful for the model.

In [None]:
import pupil
yI, ydtI, ydnI = pupil.IRF_pupil()
IRFS = [yI/yI.std(), ydnI/ydnI.std(), ydtI/ydtI.std()]
yIe, ydtIe, ydnIe = pupil.IRF_pupil(tmax=0.25, n=1)#tmax=.25, n=8)
IRFSe = [yIe, ydtIe, ydnIe]

In [None]:
low, pa , above = pupil.filter_pupil(events.pa, 100, highcut=10, lowcut=0.1)
figure(figsize=(20, 6))
plot(pa)
low, pa, above = pupil.filter_pupil(events.pa, 100, highcut=10, lowcut=1/(6.))
plot(pa)
xlim([50000, 70000])

In [None]:
events['pafilt'] = pa

# Full GLM

In [None]:
events['decend'] = events.decision>0

m, yh, y, X, res = pupil.eval_model('''
pafilt ~   
     pt.Z(left_gx) +pt.Z(left_gy) +
     pt.MF(pt.Z(left_gx), IRFS) + 
     pt.MF(pt.Z(left_gy), IRFS) + 
     pt.MF(blink, IRFS) +
     pt.MF(pt.Z(pt.dt(left_gx)), IRFS) + 
     pt.MF(pt.Z(pt.dt(left_gy)), IRFS) +
     
     pt.MF(ref, func=IRFS) +  
     pt.MF(contrast, func=IRFS) +
     pt.MF(pt.Z(pt.dt(contrast)), func=IRFS) +
     
     pt.MF(decramp21, IRFS) +
     pt.MF(decramp22, IRFS) +
     pt.MF(decramp23, IRFS) +
     pt.MF(decramp24, IRFS) +

     pt.MF(dec_start, IRFS) +
     pt.MF(decend, IRFS) +     
     
     pt.MF(feedback_offset_pos, IRFS) +
     pt.MF(feedback_offset_neg, IRFS) 
''', events.reset_index())
print events.shape
print yh.shape
events['yhat'] = yh
events['residuals'] = y-yh


In [None]:
import glm_viz
figure(figsize=(20, 6))
glm_viz.timecourse(events, y, yh, [2000, 2100])

In [None]:
messages['contrast_onset_time'] = [x.contrast_time[0] for _,x in messages.iterrows()]
messages['reference_onset_time'] = [x.decision_time-1900 for _, x in messages.iterrows()]

In [None]:
figure(figsize=(20,8))
glm_viz.condition_averages(events, messages)

In [None]:
t = linspace(0, 1, 100)
plot(1/t, t)

In [None]:
xvals = events.pafilt-events.yhat
print xvals.shape

In [None]:
xcorr = correlate(xvals[::2], xvals[::2], 'same')

In [None]:
print xcorr.shape
plot((arange(len(xcorr))-len(xcorr)/2.)/50., xcorr)
xlim([-200, 200])