## Split Stimulus Data
This workflow splits the original pickled CellRespZ timeseries into dataseries corresponding to the stimuli applied

In [1]:
import os
import pickle
import numpy as np

In [2]:
data_dir = '../data/data_raw_pickled/'
with open('../data/data_meta_pickled/subject_array.pickle', "rb") as input_file:
    subject_array = pickle.load(input_file)
subject_array

['subject_1',
 'subject_10',
 'subject_12',
 'subject_13',
 'subject_14',
 'subject_15',
 'subject_16',
 'subject_17',
 'subject_2',
 'subject_3',
 'subject_4',
 'subject_5',
 'subject_6',
 'subject_7']

In [3]:
def load_subject_data(subject):
    with open(data_dir + subject + '_CellRespZ.pickle', "rb") as input_file:
        timeseries = pickle.load(input_file)
    with open(data_dir + subject + '_mat_dict.pickle', "rb") as input_file:
        mat = pickle.load(input_file)
        stim_shape = np.unique(mat['stim_full'], return_counts=True)
    return timeseries, stim_shape

### Data Exploration:

In [4]:
for subject in subject_array:
    timeseries, stim_shape = load_subject_data(subject)
    print(subject, ' timeseries: ', timeseries.shape)
    print(subject, ' stimulus full ', stim_shape)

subject_1  timeseries:  (2880, 83205)
subject_1  stimulus full  (array([0, 1, 2, 3], dtype=uint8), array([720, 720, 720, 720], dtype=int64))
subject_10  timeseries:  (4426, 83396)
subject_10  stimulus full  (array([ 0,  1,  2,  3,  4,  9, 10, 11, 12, 13, 14, 15, 16, 21, 22, 23],
      dtype=uint8), array([204, 300, 300, 780, 720, 630, 220, 220, 220, 202,  40,  40,  50,
       150, 150, 200], dtype=int64))
subject_12  timeseries:  (6140, 58105)
subject_12  stimulus full  (array([ 0,  1,  2,  3,  4,  9, 10, 11, 12, 14, 15], dtype=uint8), array([ 500,  800,  800, 2750,  640,  150,  150,  150,  150,   25,   25],
      dtype=int64))
subject_13  timeseries:  (4840, 73202)
subject_13  stimulus full  (array([ 0,  1,  2,  3,  4,  9, 10, 11, 12, 14, 15], dtype=uint8), array([ 270,  440,  440, 2090, 1200,   50,   50,   50,   50,  100,  100],
      dtype=int64))
subject_14  timeseries:  (3890, 72412)
subject_14  stimulus full  (array([ 0,  1,  2,  3,  4,  9, 10, 11, 12, 14, 15], dtype=uint8), arra