In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from __future__ import division, print_function, unicode_literals

import sys
import sqlite3
import zlib
from pathlib import Path
import numpy as np

import msgpack
import pandas as pd

# Reading mwk2

In [3]:
try:
    buffer
except NameError:
    # Python 3
    buffer = bytes

In [4]:
class MWK2Reader(object):

    _compressed_text_type_code = 1
    _compressed_msgpack_stream_type_code = 2

    def __init__(self, filename):
        self._conn = sqlite3.connect(filename)
        self._unpacker = msgpack.Unpacker(raw=False, strict_map_key=False)

    def close(self):
        self._conn.close()

    def __enter__(self):
        return self

    def __exit__(self, type, value, tb):
        self.close()

    @staticmethod
    def _decompress(data):
        return zlib.decompress(data, -15)

    def __iter__(self):
        for code, time, data in self._conn.execute('SELECT * FROM events'):
            if not isinstance(data, buffer):
                yield (code, time, data)
            else:
                try:
                    obj = msgpack.unpackb(data, raw=False)
                except msgpack.ExtraData:
                    # Multiple values, so not valid compressed data
                    pass
                else:
                    if isinstance(obj, msgpack.ExtType):
                        if obj.code == self._compressed_text_type_code:
                            yield (code,
                                   time,
                                   self._decompress(obj.data).decode('utf-8'))
                            continue
                        elif (obj.code ==
                              self._compressed_msgpack_stream_type_code):
                            data = self._decompress(obj.data)
                self._unpacker.feed(data)
                try:
                    while True:
                        yield (code, time, self._unpacker.unpack())
                except msgpack.OutOfData:
                    pass


### Dumping contents

In [25]:
filename = Path('/braintree/data2/active/users/ssazaidi/projects/stimulation/monkeys/oleo/mworksraw/datamaco-stim_corners-20210813-134320.mwk2')
assert filename.exists()

In [6]:
with MWK2Reader(filename) as event_file:
    code_to_name, name_to_code = {}, {}
    for code, time, data in event_file:
        if code == 0 and not code_to_name:
            code_to_name = dict((c, data[c]['tagname']) for c in data)
            name_to_code = dict((data[c]['tagname'], c) for c in data)
code_to_name

{4: '#allowAltFailover',
 41: 'eye_in_window',
 5: '#state_system_mode',
 46: 'reward_line',
 6: '#announceMessage',
 35: 'fixation_point_size_max',
 7: '#stimDisplayUpdate',
 48: 'mouse_x',
 8: '#experimentLoadProgress',
 45: 'stim_start_line',
 9: '#loadedExperiment',
 50: 'mouse_button_pressed',
 10: '#announceStimulus',
 55: 'key_c_pressed',
 11: '#announceSound',
 52: 'key_p_pressed',
 12: '#announceCalibrator',
 49: 'mouse_y',
 13: '#requestCalibrator',
 54: 'key_spacebar_pressed',
 14: '#announceCurrentState',
 43: 'experiment_state_line',
 15: '#announceTrial',
 56: 'stimulus_size',
 16: '#announceBlock',
 53: 'key_r_pressed',
 17: '#announceAssertion',
 58: 'stimulus_pos_y',
 18: '#serverName',
 63: 'stim_off_time',
 19: '#mainScreenInfo',
 60: 'ignore_time',
 20: '#warnOnSkippedRefresh',
 57: 'stimulus_pos_x',
 21: '#stopOnError',
 62: 'stim_on_time',
 22: '#realtimeComponents',
 51: 'key_x_pressed',
 23: 'eye_h_raw',
 64: 'stimulus_set_repetitions',
 24: 'eye_v_raw',
 61: 's

In [7]:
# codes = [
#             name_to_code['eye_h'],
#             name_to_code['eye_v'],
#             name_to_code['stim_key'],
#             name_to_code['RSVP_test_stim_index'],
#             name_to_code['stim_id'],
#             # Other meta data
#             name_to_code['stim_on_time'],
#             name_to_code['stim_off_time'],
#             name_to_code['stim_on_delay'],
#         ]

# data_dict = {
#     'code': [],
#     'name': [],
#     'time': [],
#     'data': [],
# }
# with MWK2Reader(filename) as event_file:
#     for code, time, data in event_file:
#         if code in codes:
#             data_dict['code'].append(code)
#             data_dict['name'].append(code_to_name[code])
#             data_dict['time'].append(time)
#             data_dict['data'].append(data)

In [21]:
name_to_code['stim_id']

80

In [22]:
codes = [
            name_to_code['eye_h'],
            name_to_code['eye_v'],
            name_to_code['trial_start_line'],
            name_to_code['correct_fixation'],
            name_to_code['stimulus_presented'],
            name_to_code['stim_id'],
            name_to_code['stim_current'],
            # Other meta data
            name_to_code['stim_on_time'],
            name_to_code['stim_off_time'],
            name_to_code['stim_on_delay'],
            name_to_code['stimulus_size'],
            name_to_code['fixation_window_size'],
            name_to_code['fixation_point_size_min'],
        ]

data_dict = {
    'code': [],
    'name': [],
    'time': [],
    'data': [],
}
with MWK2Reader(filename) as event_file:
    for code, time, data in event_file:
        if code in codes:
            data_dict['code'].append(code)
            data_dict['name'].append(code_to_name[code])
            data_dict['time'].append(time)
            data_dict['data'].append(data)

In [27]:
np.unique(data_dict['name'])

array(['correct_fixation', 'eye_h', 'eye_v', 'fixation_point_size_min',
       'fixation_window_size', 'stim_current', 'stim_id', 'stim_off_time',
       'stim_on_delay', 'stim_on_time', 'stimulus_presented',
       'stimulus_size', 'trial_start_line'], dtype='<U23')

In [28]:
data_df = pd.DataFrame(data_dict)
data_df.head()

Unnamed: 0,code,name,time,data
0,28,eye_h,93209720534,0.0
1,28,eye_h,93209721656,0.0
2,28,eye_h,93210439165,2.722309
3,28,eye_h,93210439204,2.722398
4,28,eye_h,93210439246,2.722501


In [29]:
data_df.loc[data_df['name']=='stim_id']['data'].unique()

array(['', 'a-031', 'd-016', 'd-018', 'd-005', 'd-000', 'd-008', 'a-016',
       'd-010', 'a-000', 'a-028', 'a-022', 'd-025', 'b-000', 'b-031',
       'a-024'], dtype=object)

In [30]:
data_df['name'].unique()

array(['eye_h', 'eye_v', 'fixation_window_size',
       'fixation_point_size_min', 'trial_start_line', 'stimulus_size',
       'stim_on_delay', 'stim_on_time', 'stim_off_time',
       'correct_fixation', 'stimulus_presented', 'stim_id',
       'stim_current'], dtype=object)

In [31]:
data_df = data_df.sort_values(by='time', ignore_index=True)
data_df.head()

Unnamed: 0,code,name,time,data
0,28,eye_h,93209720534,0.0
1,29,eye_v,93209720535,0.0
2,30,fixation_window_size,93209720536,2.0
3,34,fixation_point_size_min,93209720539,0.2
4,44,trial_start_line,93209720565,0.0


In [32]:
data_df['first_in_trial'] = False  # Store whether the stimulus is first in a trial
trial_start_df = data_df[(data_df.name == 'trial_start_line') | ((data_df.name == 'stimulus_presented') & (data_df.data != -1))]
first_in_trial_times = [trial_start_df.time.values[i] for i in range(1, len(trial_start_df))
                        if ((trial_start_df.name.values[i - 1] == 'trial_start_line') and
                            (trial_start_df.name.values[i] == 'stimulus_presented'))]
data_df['first_in_trial'] = data_df['time'].apply(lambda x: True if x in first_in_trial_times else False)
data_df

Unnamed: 0,code,name,time,data,first_in_trial
0,28,eye_h,93209720534,0,False
1,29,eye_v,93209720535,0,False
2,30,fixation_window_size,93209720536,2,False
3,34,fixation_point_size_min,93209720539,0.2,False
4,44,trial_start_line,93209720565,0,False
...,...,...,...,...,...
12859593,63,stim_off_time,103831943374,100000,False
12859594,68,correct_fixation,103831943379,-1,False
12859595,69,stimulus_presented,103831943380,-1,False
12859596,80,stim_id,103831943468,d-000,False


In [33]:
stimulus_presented_df = data_df[data_df.name == 'stimulus_presented'].reset_index(drop=True)
correct_fixation_df = data_df[data_df.name == 'correct_fixation'].reset_index(drop=True)
correct_fixation_df = correct_fixation_df[stimulus_presented_df.data != -1].reset_index(drop=True)
stimulus_presented_df = stimulus_presented_df[stimulus_presented_df.data != -1].reset_index(drop=True)
correct_fixation_df['first_in_trial'] = stimulus_presented_df['first_in_trial']

In [34]:
print(stimulus_presented_df.shape, correct_fixation_df.shape)

(1806, 5) (1806, 5)


In [19]:
stimulus_presented_df

Unnamed: 0,code,name,time,data,first_in_trial
0,69,stimulus_presented,93357674072,1.0,True
1,69,stimulus_presented,93358008064,2.0,False
2,69,stimulus_presented,93358341230,3.0,False
3,69,stimulus_presented,93358675073,4.0,False
4,69,stimulus_presented,93359009477,5.0,False
...,...,...,...,...,...
1801,69,stimulus_presented,103434891928,35.0,False
1802,69,stimulus_presented,103435212335,36.0,False
1803,69,stimulus_presented,103435546301,37.0,False
1804,69,stimulus_presented,103435878946,38.0,False


In [20]:
output = {
        'stim_on_time_ms': data_df[data_df.name == 'stim_on_time']['data'].values[-1] / 1000.,
        'stim_off_time_ms': data_df[data_df.name == 'stim_off_time']['data'].values[-1] / 1000.,
        'stim_on_delay_ms': data_df[data_df.name == 'stim_on_delay']['data'].values[-1] / 1000.,
        'stimulus_size_degrees': data_df[data_df.name == 'stimulus_size']['data'].values[-1],
        'fixation_window_size_degrees': data_df[data_df.name == 'fixation_window_size']['data'].values[-1],
        'fixation_point_size_degrees': data_df[data_df.name == 'fixation_point_size_min']['data'].values[-1],
    }

In [287]:
eyeh, eyev = [], []
for t in stimulus_presented_df.time.values:
    eyeh.append(data_df[(data_df.name == 'eye_h') & (data_df.time >= t) & (data_df.time <= (t + output['stim_on_time_ms'] * 1000.))].data.values.tolist())
    eyev.append(data_df[(data_df.name == 'eye_v') & (data_df.time >= t) & (data_df.time <= (t + output['stim_on_time_ms'] * 1000.))].data.values.tolist())
print(len(eyeh), len(eyev))

393 393


In [288]:
threshold = output['stim_on_time_ms'] // 2

for i in range(len(eyeh)):
    if correct_fixation_df.iloc[i]['data'] == -1: # Skip if marked incorrect
        continue
    
    if len(eyeh[i]) < threshold or len(eyev[i]) < threshold:
        correct_fixation_df.at[i, 'data'] = -1
    elif np.any([np.abs(_) > 2 for _ in eyeh[i]]) or np.any([np.abs(_) > 2 for _ in eyev[i]]):
        correct_fixation_df.at[i, 'data'] = -1

In [289]:
stimulus_presented_df[314:340]

Unnamed: 0,code,name,time,data,first_in_trial
314,67,stimulus_presented,191731782615,24.0,True
315,67,stimulus_presented,191732015671,25.0,False
316,67,stimulus_presented,191732249123,23.0,False
317,67,stimulus_presented,191732482517,4.0,False
318,67,stimulus_presented,191732715726,15.0,False
319,67,stimulus_presented,191772716711,18.0,True
320,67,stimulus_presented,191772950378,20.0,False
321,67,stimulus_presented,191782851559,14.0,True
322,67,stimulus_presented,191783085201,16.0,False
323,67,stimulus_presented,191783318249,1.0,False


In [290]:
correct_fixation_df[314:340]

Unnamed: 0,code,name,time,data,first_in_trial
314,66,correct_fixation,191731997581,1.0,True
315,66,correct_fixation,191732230733,-1.0,False
316,66,correct_fixation,191732464126,-1.0,False
317,66,correct_fixation,191732697547,-1.0,False
318,66,correct_fixation,191732830858,-1.0,False
319,66,correct_fixation,191772932129,-1.0,True
320,66,correct_fixation,191772980225,-1.0,False
321,66,correct_fixation,191783066707,-1.0,True
322,66,correct_fixation,191783299530,-1.0,False
323,66,correct_fixation,191783533328,-1.0,False


In [35]:
def get_trial_indices(events, df = False):
    if df:
        times = np.array([row.time for i, row in events.iterrows()])
    else:
        times = np.array([i.time for i in events])
#     print(len(times))
    diff_times = np.diff(times)
#     print(len(diff_times))

    trials = []

    mini_trial = [0]

    for i, t in enumerate(diff_times):
        if t < 1e7:
            mini_trial.append(i+1)
        else:
            trials.append(mini_trial)
            mini_trial = [i+1]
    trials.append(mini_trial)
    print(i)
    return trials

current_events = data_df.loc[data_df['name'] == 'stim_current']
id_events = data_df.loc[data_df['name'] == 'stim_id']
current_times = np.array([row.time for i, row in current_events.iterrows()])
id_times = np.array([row.time for i, row in id_events.iterrows()])
current_trials = get_trial_indices(current_events, df=True)
id_trials = get_trial_indices(id_events, df=True)

correct_id = []
correct_current = []
for i in range(len(id_trials)):
#     print((current_times[current_trials[i][0]] - id_times[id_trials[i][0]]), (current_times[current_trials[i+1][0]] - id_times[id_trials[i][0]]))
#     print((current_times[current_trials[i][-1]] - id_times[id_trials[i][-1]]), (current_times[current_trials[i+1][-1]] - id_times[id_trials[i][-1]]))

    
    current_trial = current_trials[i]
    id_trial = id_trials[i]
    if '' in np.array(id_events.iloc[id_trial].data):
        continue

        #     print([current_events.data.iloc[j] for j in current_trial], [id_events.data.iloc[j] for j in id_trial])

    for idx, j in enumerate(id_trial):
        try:
            correct_current.append(current_trial[idx])
            correct_id.append(j)
        except:
            print(idx)
        

1856
1813
8
8


In [45]:
stim_id_df = id_events.iloc[correct_id]
stim_current_df = current_events.iloc[correct_current]


In [38]:
len(stimulus_presented_df)

1806