# Fragility Data Analysis
Using this notebook to prototype the functionality for the fragility algorithm (EpiMAP).

Run through this notebook and change the parameters for the file directory, interictal or not, and patient names to create compressed json data files.

These json data files can then be stored optimally on a database, such as MongoDB.

In [53]:
import pandas as pd
import numpy as np
import json
from IPython.display import display
import os

import scipy, scipy.io
try:
    import cPickle as pickle
except:
    import pickle

import gzip
import cStringIO
import bz2,json,contextlib

import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt
import itertools
from mpl_toolkits.axes_grid1 import make_axes_locatable
# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')
from natsort import natsorted, ns

%matplotlib inline

## sklearn imports
import sklearn.linear_model
import sklearn.cross_validation
import sklearn.tree
import sklearn.ensemble
import sklearn.preprocessing
import sklearn.feature_selection
import sklearn.pipeline
import sklearn.grid_search

In [54]:
def loadmat(filename):
    '''
    this function should be called instead of direct spio.loadmat
    as it cures the problem of not properly recovering python dictionaries
    from mat files. It calls the function check keys to cure all entries
    which are still mat-objects
    '''
    data = scipy.io.loadmat(filename, struct_as_record=False, squeeze_me=True)
    return _check_keys(data)

def _check_keys(dict):
    '''
    checks if entries in dictionary are mat-objects. If yes
    todict is called to change them to nested dictionaries
    '''
    for key in dict:
        if isinstance(dict[key], scipy.io.matlab.mio5_params.mat_struct):
            dict[key] = _todict(dict[key])
    return dict        

def _todict(matobj):
    '''
    A recursive function which constructs from matobjects nested dictionaries
    '''
    dict = {}
    for strg in matobj._fieldnames:
        elem = matobj.__dict__[strg]
        if isinstance(elem, scipy.io.matlab.mio5_params.mat_struct):
            dict[strg] = _todict(elem)
        elif isinstance(elem,np.ndarray):
            dict[strg] = _tolist(elem)
        else:
            dict[strg] = elem
    return dict

def _tolist(ndarray):
    '''
    A recursive function which constructs lists from cellarrays 
    (which are loaded as numpy ndarrays), recursing into the elements
    if they contain matobjects.
    '''
    elem_list = []            
    for sub_elem in ndarray:
        if isinstance(sub_elem, scipy.io.matlab.mio5_params.mat_struct):
            elem_list.append(_todict(sub_elem))
        elif isinstance(sub_elem,np.ndarray):
            elem_list.append(_tolist(sub_elem))
        else:
            elem_list.append(sub_elem)
    return elem_list

def convertMatToJSON(matData, fileName):
    for key in matData.keys():
        if (type(matData[key])) is np.ndarray:
            serializedData = pickle.dumps(matData[key], protocol=0) # protocol 0 is printable ASCII
            jsonData[key] = serializedData
        else:
            jsonData[key] = matData[key]

    with contextlib.closing(bz2.BZ2File(fileName, 'wb')) as f:
        json.dump(jsonData, f)

In [59]:
## Take .mat EEG files and convert them to .json data

patients = [
#             'pt1sz2', 'pt1sz3', 'pt1sz4', \
#             'pt2sz1', 'pt2sz3', 
            'pt2sz4', \
            'pt3sz2', 'pt3sz4', 'pt6sz3', 'pt6sz4', 'pt6sz5', \
            'pt7sz19', 'pt7sz21', 'pt7sz22', \
            'pt8sz1', 'pt8sz2', 'pt8sz3', \
            'JH101sz1', 'JH101sz2', 'JH101sz3', 'JH101sz4', \
            'JH102sz1', 'JH102sz2', 'JH102sz3', 'JH102sz4', 'JH102sz5', 'JH102sz6', \
            'JH103sz1', 'JH103sz2', 'JH103sz3', \
            'JH104sz1', 'JH104sz2', 'JH104sz3', \
            'JH105sz1', 'JH105sz2', 'JH105sz3', 'JH105sz4', 'JH105sz5',\
            'JH106sz1', 'JH106sz2', 'JH106sz3', 'JH106sz4', 'JH106sz5', 'JH106sz6', \
            'JH107sz1', 'JH107sz2', 'JH107sz3', 'JH107sz4', 'JH107sz5', 'JH107sz6', 'JH107sz7', 'JH107sz8', 'JH107sz9', \
            'JH108sz1', 'JH108sz2', 'JH108sz3', 'JH108sz4', 'JH108sz5', 'JH108sz6', 'JH108sz7'
           ]
EXTERNAL = 1

# initialize directory with data
dataDir = '../fragility_dataanalysis/data/'
iiDataDir = '../fragility_dataanalysis/data/interictal_data/'
newDataDir = '../fragility_dataanalysis/data/json/' # new directory to save data

if EXTERNAL:
    dataDir = '/Volumes/NIL_PASS/data/'
    iiDataDir = '/Volumes/NIL_PASS/data/interictal_data/'
    newDataDir = '/Volumes/NIL_PASS/data/json/'
    
# check if new dataDir exists, if not create it
if not os.path.exists(newDataDir):
    os.makedirs(newDataDir)
    
print 'EXTERNAL IS: ', EXTERNAL

EXTERNAL IS:  1


In [60]:
# get mat files in a patient's directory
patient = patients[0]

for patient in patients:
    print "Starting patient ", patient
    
    patientDir = dataDir + patient + '/'
    matFiles = []
    for file in os.listdir(patientDir):
        if file.endswith('.mat'):
            matFiles.append(file)

    if len(matFiles) > 1:
        print "There is too many .mat files in this directory!"
        print "Check patient ", patient

    matFile = patientDir + matFiles[0]
    data = loadmat(matFile)

    fileName = newDataDir + patient + 'raw.json.bz2'
    try:
        convertMatToJSON(data, fileName)
    except:
        print "Problem with ", patient


Starting patient  pt2sz4
Problem with  pt2sz4
Starting patient  pt3sz2
Starting patient  pt3sz4
Starting patient  pt6sz3
Starting patient  pt6sz4
Starting patient  pt6sz5
Starting patient  pt7sz19
Starting patient  pt7sz21
Starting patient  pt7sz22
Starting patient  pt8sz1
Starting patient  pt8sz2
Starting patient  pt8sz3
Starting patient  JH101sz1
Starting patient  JH101sz2
Starting patient  JH101sz3
Starting patient  JH101sz4
Starting patient  JH102sz1
Starting patient  JH102sz2
Starting patient  JH102sz3
Starting patient  JH102sz4
Starting patient  JH102sz5
Starting patient  JH102sz6
Starting patient  JH103sz1
Starting patient  JH103sz2
Starting patient  JH103sz3
Starting patient  JH104sz1
Starting patient  JH104sz2
Starting patient  JH104sz3
Starting patient  JH105sz1
Starting patient  JH105sz2
Starting patient  JH105sz3
Starting patient  JH105sz4
Starting patient  JH105sz5
Starting patient  JH106sz1
Starting patient  JH106sz2
Starting patient  JH106sz3
Starting patient  JH106sz4
S