# Raw Data Pickling Function
This code pickles raw data downloaded from [Chen et al](https://janelia.figshare.com/articles/Whole-brain_light-sheet_imaging_data/7272617) and converts them into Pandas, Numpy, and Python objects.

To ensure function, load all raw data into [../data/data_raw_from_paper](../data/data_raw_from_paper)

In [1]:
import pandas as pd
import numpy as np
import scipy.io as scio
import h5py
from util_functions import list_subjects, pickle_file, starting_run, finished_run

### Notebook Parameters

In [2]:
output_data_dir = 'data_raw_pickled/'
data_raw_pickled_df = pd.DataFrame(columns=['subject', 'stimulus', 'rel_path', 'timepoints_count',  'timepoints'])

### Notebook Functions

In [3]:
def save_mat_data(path, subject):
    mat = scio.loadmat(path)
    mat_dict = {
        "periods": mat['data'][0][0][0],
        "timelists_names": mat['data'][0][0][1],
        "stimulus_key_raw": mat['data'][0][0][2],
        "cell_XYZ": mat['data'][0][0][3],
        "anat_stack": mat['data'][0][0][4],
        "frame_per_sec": mat['data'][0][0][5],
        "behavior_raw": mat['data'][0][0][6],
        "num_cells": mat['data'][0][0][7],
        "cell_XYZ_norm": mat['data'][0][0][8],
        "IX_inval_anat": mat['data'][0][0][9],
        "anat_yx": mat['data'][0][0][10],
        "anat_yz": mat['data'][0][0][11],
        "anat_xz": mat['data'][0][0][12],
        "timelists": mat['data'][0][0][13],
        "stim_full": mat['data'][0][0][14],
        "stim_avr": mat['data'][0][0][15],
        "behavior_full": mat['data'][0][0][16],
        "behavior_avr": mat['data'][0][0][17],
#         "behavior_full_motor": mat['data'][0][0][18],
#         "behavior_avr_motor": mat['data'][0][0][19],
#         "eye_full_motorseed": mat['data'][0][0][20],
#         "eye_avr_motorseed": mat['data'][0][0][21],
#         "eye_full": mat['data'][0][0][22],
#         "eye_avr": mat['data'][0][0][23]
    }
    pickle_file(output_data_dir + subject + '_mat_dict.pickle', mat_dict)

In [4]:
def save_h5_data(path, subject):
    f = h5py.File(path, 'r')
    for key in f.keys():
        save_path = output_data_dir + subject + '_' + key + '.pickle'
        pickle_file(save_path, np.array(f[key]))
        if key == 'CellRespZ':
            data_raw_pickled_df.loc[subject] = [subject, None, save_path, np.array(f[key]).shape, None]

In [5]:
for subject in list_subjects():
    subject_input_path = '../data/data_raw_from_paper/'+subject+'/'+subject
    starting_run('save ' + subject)
    save_mat_data(subject_input_path + '/data_full.mat', subject)
    save_h5_data(subject_input_path + '/TimeSeries.h5', subject)
finished_run('saving raw data')
pickle_file('data_meta_pickled/data_raw_pickled_df.pickle', data_raw_pickled_df)

Starting save subject_1 16:11:43.568480
Starting save subject_10 16:11:52.914547
Starting save subject_12 16:12:07.243676
Starting save subject_13 16:12:20.625531
Starting save subject_14 16:12:33.534627
Starting save subject_15 16:12:44.479386
Starting save subject_16 16:12:53.924326
Starting save subject_17 16:12:59.593224
Starting save subject_18 16:13:14.778975
Starting save subject_2 16:13:33.269163
Starting save subject_3 16:13:43.491465
Starting save subject_4 16:13:51.267467
Starting save subject_5 16:14:00.912230
Starting save subject_6 16:14:11.511444
Starting save subject_7 16:14:23.712471
Finished saving raw data 16:14:30.591768
