In [None]:
%matplotlib notebook
import mne
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
from sklearn import linear_model
# athenacli -e prod -w3 -n0 -p KET -o . EEGTEST

### Load the montage of sensor channel locations and set up the files to process.

In [None]:
montage = mne.channels.read_montage('standard_1020')
#edf_file = 'EEG_TEST_0001_raw.edf'
#log_file = 'log3.csv'
#edf_file = os.path.expanduser('~/data/eeg/20190701/aditya_TEST_raw.edf')
#log_file = os.path.expanduser('~/data/eeg/20190701/adityaTest.csv')
edf_file = os.path.expanduser('~/data/eeg/pilot/7_3_Charles/human_exam_number_raw.edf')
log_file = os.path.expanduser('~/data/eeg/pilot/7_3_Charles/charles_test_july3.csv')

In [None]:
raw = mne.io.read_raw_edf(edf_file, stim_channel='Trigger', eog=['EEG X1-Pz'], 
                          misc=['EEG CM-Pz','EEG X2-Pz','EEG X3-Pz'])
# Rename the channels so they match the standard montage channel names
raw.rename_channels({c:c.replace('EEG ','').replace('-Pz','') for c in raw.ch_names})
raw.set_montage(montage)
eeg_sample_interval_ms = 1/raw.info['sfreq'] * 1000
print(raw.info)
#raw.plot_sensors()

### Find the events

In [None]:
events = mne.find_events(raw)

In [None]:
logdf = pd.read_csv(log_file, header=None, names=['client_ts','trigger_ts','rtdelay','msg','uid'])
logdf.client_ts = (logdf.client_ts * 1000).round().astype(int)
logdf.trigger_ts = (logdf.trigger_ts * 1000).round().astype(int)
logdf['bytecode'] = logdf.client_ts % 255 + 1
logdf = logdf.sort_values('trigger_ts').reset_index(drop=True)
#phone_start = 722
#logdf = logdf.iloc[722:, :]
#logdf.to_csv(os.path.dirname(log_file) + '/_log.csv')
logdf.head()

In [None]:
eventdf = pd.DataFrame(events, columns=['time_idx','prev_diff','bytecode'])
# NOTE: the eeg timestamp is local time, not UTC! Be sure to use the correct adjustment here.
event_start_ts = int(raw.info['meas_date'][0]) + 7*60*60
eventdf['eeg_ts'] = ((eventdf.time_idx / 300 + event_start_ts) * 1000).round().astype(int)
eventdf.to_csv(os.path.dirname(log_file) + '/events.csv')
np.sort(pd.unique(eventdf.bytecode))
# bits for 2 and 32 are always 0

### FIX FOR BAD BITS
For the data collected on 2019-07-03, the bits representing 2 and 32 are always zero. So zero-out those bits in the log bytecode before finding matches.

In [None]:
logdf.bytecode = logdf.bytecode.apply(lambda x: x&0b11011101)
print(np.sort(pd.unique(logdf.bytecode)))
print(pd.unique(logdf.msg))

### Find the best-matching bytecode for each event

In [None]:
window = 1000 # +/-, in milliseconds
match_inds = []
for event_idx in eventdf.index:
    tmp = logdf.loc[np.abs(eventdf.eeg_ts[event_idx] - logdf.trigger_ts) < window, :]
    matches = tmp.index[tmp.bytecode == eventdf.bytecode[event_idx]]
    if len(matches) > 0:
        for match in matches:
            # See if the surrounding bytecodes match. If so, add this to the list
            keep = False
            try:
                keep = True
                for idx in range(-3,4):
                    if (tmp.bytecode[match + idx] != eventdf.bytecode[event_idx + idx]):
                        keep = False
                        continue
            except:
                #print(tmp.bytecode[match[0]+1], eventdf.bytecode[event_idx+1])
                pass
            if keep:
                match_inds.append((logdf.msg[match], match, event_idx, logdf.trigger_ts[match], logdf.client_ts[match],
                                    eventdf.eeg_ts[event_idx], eventdf.time_idx[event_idx], logdf.rtdelay[match]))
                
print('Found %d matching timepoints.' % len(match_inds))
cols = ['msg','logdf_idx','eventdf_idx','trigger_ts','client_ts','eeg_ts','eeg_samp','rt_delay']
matchdf = pd.DataFrame(data=match_inds, columns=cols)
matchdf = matchdf.sort_values('client_ts').reset_index(drop=True)
matchdf.loc[matchdf.rt_delay <= 0, 'rt_delay'] = np.nan
matchdf.head()

### Find the optimal fuzzy alignment between the log file and event bytecode sequence
Piecewise-linear should be used to minimze the error accumulation across long recording runs.
http://www.xavierdupre.fr/app/mlinsights/helpsphinx/notebooks/piecewise_linear_regression.html

In [None]:
tmp = matchdf.copy(deep=True).loc[matchdf.msg.isin(msgs),:]
tmp.reset_index(inplace=True)
tmp['netdelay'] = tmp.rt_delay.shift(-1) / 2
tmp.netdelay.fillna(tmp.netdelay.mean(), inplace=True)
#tmp.head()
#plt.scatter(tmp.client_ts, tmp.trigger_ts, color='g')
#plt.scatter(tmp.client_ts, tmp.trigger_ts-tmp.netdelay, color='b')

In [None]:
def fit_timestamps(matchdf, msgs, mad_scale=100, correct_delay=True, plot=False):
    tmp = matchdf.copy(deep=True).loc[matchdf.msg.isin(msgs),:]
    tmp.reset_index(inplace=True)
    tmp['netdelay'] = np.nan
    # the round-trip delay stored in row n is actually for row n-1 
    tmp['netdelay'] = tmp.rt_delay.shift(-1) / 2
    # mean-fill the net delay
    delay_mean = tmp.netdelay.median()
    delay_std = tmp.netdelay.std()
    tmp.netdelay.fillna(delay_mean, inplace=True)
    
    # The eeg timestamps are shifted back in time, as if there were no network delay. This 
    # leaves the clock bias as only difference (on average) between the eeg and client timestamps.
    x = tmp.client_ts.values
    if correct_delay:
        #y = tmp.eeg_ts.values - delay_mean
        y = tmp.eeg_ts.values - tmp.netdelay
        # FIXME-- should be tmp.netdelay, but we may have an issue with rt_times
    else:
        y = tmp.eeg_ts.values
    # WORK HERE: 
    # Offset includes clock bias (same for all measurements) and the timing error for this one measurment.
    # The intercept in the fitted model *should* take care of this, but may require more thought to be sure.
    offset = y[0]
    x = x - offset
    y = y - offset

    X = np.atleast_2d(x).T

    # Robust linear fit
    thresh = (np.abs(y - y.mean())).mean() / mad_scale
    client_to_eeg = linear_model.RANSACRegressor(residual_threshold=thresh)
    client_to_eeg.fit(X, y)
    
    x_keep = logdf.client_ts[logdf.msg.isin(msgs)].values - offset
    predicted_eeg_ts = (client_to_eeg.predict(np.atleast_2d(np.array(x_keep)).T) + offset - event_start_ts*1000) 
    predicted_eeg_samp = (predicted_eeg_ts / (1000/300)).round().astype(int)
    # Predict data of estimated models
    if plot:
        y_hat = client_to_eeg.predict(np.atleast_2d(x).T) 
        plt.plot(x, y, 'ro', x, y_hat, 'k-')
        print('x0=%d, y0=%d, y_hat0=%d, offset=%d' % (x[0],y[0],int(round(y_hat[0])),offset))
        print(client_to_eeg.estimator_.coef_[0], client_to_eeg.estimator_.intercept_)
        outliers = np.argwhere(client_to_eeg.inlier_mask_ == False).flatten()
        print(','.join([str(v) for v in outliers]))
    print('using %s (options are: %s)' % (','.join(msgs), ', '.join(pd.unique(matchdf.msg))))
    outmask = client_to_eeg.inlier_mask_ == False
    print('Rejecting %d out of %d sample pairs.' % (outmask.sum(), len(outmask)))
    print('Mean network delay: %0.2f ms (%0.2f stdev)' % (delay_mean, delay_std))
    return predicted_eeg_samp,client_to_eeg,offset


In [None]:
msgs = ['imageFlip']
#msgs = ['good click', 'bad click', 'early click']
mad_scale = 10
predicted_eeg_samp,client_to_eeg,offset = fit_timestamps(matchdf, msgs, mad_scale=mad_scale, correct_delay=True, plot=True)


### Synthesize a corrected event sequence
This new sequence takes into account the random delay from one even to the next and the average network delay. Because event trigger packets can arrive out-of-order, they needed to be resorted above to apply the proper delays. But now that everything is corrected, we can resort them based on the event timestamp so mne doesn't complain about a non-chronological event sequence.

In [None]:
bias = int(round(0 / (1/.3)))
predicted_eeg_samp,client_to_eeg,offset = fit_timestamps(matchdf, msgs, mad_scale=mad_scale, correct_delay=True)

syn_event_df = pd.DataFrame([(i+bias,0,1) for i in predicted_eeg_samp], columns=['ts','diff','code'])
raw_no_ref,_ = mne.set_eeg_reference(raw.load_data().filter(l_freq=None, h_freq=45), [])
#raw_no_ref, _ = mne.set_eeg_reference(raw.load_data(), [])
reject = dict(eeg=150e-6) # 180e-6, eog=150e-6)
event_id, tmin, tmax = {'visual': 1}, -0.10, 0.5
epochs_params = dict(events=syn_event_df.values, event_id=event_id, tmin=tmin, tmax=tmax, reject=reject)
evoked_no_ref = mne.Epochs(raw_no_ref, **epochs_params).average()

predicted_eeg_samp,client_to_eeg,offset = fit_timestamps(matchdf, msgs, correct_delay=False)
syn_event_df = pd.DataFrame([(i+bias,0,1) for i in predicted_eeg_samp], columns=['ts','diff','code'])
epochs_params = dict(events=syn_event_df.values, event_id=event_id, tmin=tmin, tmax=tmax, reject=reject)
evoked_no_ref_uncorrected = mne.Epochs(raw_no_ref, **epochs_params).average()

del raw_no_ref  # save memory

In [None]:
p = evoked_no_ref.plot(time_unit='ms', spatial_colors=True)
#p = evoked_no_ref_uncorrected.plot(time_unit='ms', spatial_colors=True)
#t = evoked_no_ref.plot_topomap(times=[0.075,0.1,0.125,.15,.175], size=1.0, title=title, time_unit='s')

In [None]:
#p = evoked_no_ref.plot(time_unit='ms', spatial_colors=True)
p = evoked_no_ref_uncorrected.plot(time_unit='ms', spatial_colors=True)
#t = evoked_no_ref.plot_topomap(times=[0.075,0.1,0.125,.15,.175], size=1.0, title=title, time_unit='s')

### Sample code for doing frequency analysis

In [None]:
occ = raw.get_data(['O1','O2'])[:,predicted_eeg_idx[0]:predicted_eeg_idx[-1]]
occ.shape

In [None]:
ft = np.fft.rfft(occ)
T = eeg_sample_interval_ms / 1000
xf = np.linspace(0.0, 1.0/(2.0*T), int(np.ceil(occ.shape[1]/2))+1)

In [None]:
fig = plt.Figure(figsize=(12,6))
plt.plot(xf[100:1000], np.abs(ft[1,100:1000]))

In [None]:
logdf.head()

In [None]:
plt.plot(df.client_ts)

In [None]:
df.head()

In [None]:
raw.get_data().shape