In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Conv3D, MaxPool3D, Flatten, Dense
from tensorflow.keras.layers import Dropout, Input, BatchNormalization
from sklearn.metrics import confusion_matrix, accuracy_score
# from plotly.offline import iplot, init_notebook_mode
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adadelta
import tensorflow.keras.metrics
# import plotly.graph_objs as go
# from matplotlib.pyplot import cm
from tensorflow.keras.models import Model
import numpy as np
# import keras
import h5py
import pandas as pd
import re
# import glob
# from google.colab import drive
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split


from scipy.io import loadmat
from pyts.image import GramianAngularField
from pyts.image import RecurrencePlot
import sys
from os import walk

import os
from google.cloud import storage
import gcsfs
from os import environ
from imblearn.over_sampling import (SMOTE, BorderlineSMOTE, SVMSMOTE, SMOTENC, KMeansSMOTE)
from imblearn.over_sampling import ADASYN
from imblearn.under_sampling import NearMiss 
import random

In [None]:
input_layer = Input((108, 108, 16,1))

## convolutional layers
conv_layer1 = Conv3D(filters=64, kernel_size=(3, 3, 3), strides=(2, 2, 2),padding='same')(input_layer)
leaky_layer1 = tf.keras.layers.LeakyReLU()(conv_layer1)

# conv_layer1 = (conv_layer1)(leaky_layer1)

## add max pooling to obtain the most imformatic features
pooling_layer1 = MaxPool3D(pool_size=(2, 2, 2), strides =(2,2,2))(leaky_layer1)


conv_layer2 = Conv3D(filters=128, kernel_size=(3, 3, 3), strides=(1, 1, 1),padding='same')(pooling_layer1)
leaky_layer2 = tf.keras.layers.LeakyReLU()(conv_layer2)

## add max pooling to obtain the most imformatic features
pooling_layer2 = MaxPool3D(pool_size=(2, 2, 2), strides = (1,2,2))(leaky_layer2)

conv_layer3 = Conv3D(filters=256, kernel_size=(3,3,3), strides=(1,1,1),padding='same')(pooling_layer2)
leaky_layer3 = tf.keras.layers.LeakyReLU()(conv_layer3)

conv_layer4 = Conv3D(filters=256, kernel_size=(3,3,3), strides=(1,1,1),padding='same')(leaky_layer3)
leaky_layer4 = tf.keras.layers.LeakyReLU()(conv_layer4)

pooling_layer3 = MaxPool3D(pool_size=(2, 2, 2), strides = (1,2,2))(leaky_layer4)

# flatten before passing to fully connected layer 
pooling_layer3 = BatchNormalization()(pooling_layer3)

flatten_layer=Flatten()(pooling_layer3)


dense_layer1 = Dense(units=128)(flatten_layer)
dense_layer1 = Dropout(0.5)(dense_layer1)

dense_layer2 = Dense(units=64)(dense_layer1)
dense_layer2 = Dropout(0.5)(dense_layer2)

output_layer = Dense(units=2, activation='softmax')(dense_layer2)
model = Model(inputs=input_layer, outputs=output_layer)

model.compile(loss=categorical_crossentropy, optimizer=tf.keras.optimizers.Adam(learning_rate=0.01, beta_1=0.9, beta_2=0.999), metrics=['accuracy'])



In [2]:
# Credentials should be set on terminal prior to running on gcp
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"G:\Seizure_Data\credentials\seizure-prediction123-593d1ad578b8.json"
print(os.environ.get('GOOGLE_APPLICATION_CREDENTIALS'))
client=storage.Client()
os.environ["GCP_BUCKET_NAME"] = "marwansrikarharris"
bucket_name = os.environ.get("GCP_BUCKET_NAME")
bucket = client.get_bucket(bucket_name)
files = bucket.list_blobs(prefix="Data/")
fileList = [file.name for file in files if '.' in file.name]
fs=gcsfs.GCSFileSystem(project='seizure-prediction123')

G:\Seizure_Data\credentials\seizure-prediction123-593d1ad578b8.json


Assemble data

In [None]:
# Assemble list of file names for each dog
Dog_1_files = []
Dog_2_files = []
Dog_3_files = []
Dog_4_files = []
Dog_5_files = []

for idx, filename in enumerate(fileList):
    Dog_1_search = re.search(r"Dog_1", filename)
    Dog_2_search = re.search(r"Dog_2", filename)
    Dog_3_search = re.search(r"Dog_3", filename)
    Dog_4_search = re.search(r"Dog_4", filename)
    Dog_5_search = re.search(r"Dog_5", filename)

    if Dog_1_search:
        Dog_1_files.append(filename)
    elif Dog_2_search:
        Dog_2_files.append(filename)
    elif Dog_3_search:
        Dog_3_files.append(filename)
    elif Dog_3_search:
        Dog_3_files.append(filename)
    elif Dog_4_search:
        Dog_4_files.append(filename)
    elif Dog_5_search:
        Dog_5_files.append(filename)

# list containing the indices of interictal and preictal data end        
master_indices =np.zeros((5 ,2))
master_indices[0][0 ] =479
master_indices[0][1 ] =503
master_indices[1][0 ] =499
master_indices[1][1 ] =541
master_indices[2][0 ] =1439
master_indices[2][1 ] =1511
master_indices[3][0 ] =803
master_indices[3][1 ] =900
master_indices[4][0 ] =449
master_indices[4][1 ] =479

master_indices[0][0]=803
master_indices[0][1]=900

all_dogs =[]
all_dogs.append(Dog_1_files)
all_dogs.append(Dog_2_files)
all_dogs.append(Dog_3_files)
all_dogs.append(Dog_4_files)
all_dogs.append(Dog_5_files)

all_dogs =np.array(all_dogs)

In [3]:
# get mariables from .mat structs
def get_variables(x,path,searched_string):
    searched_string=searched_string.group(0)
    searched_string=re.sub(r"_[0]+(?=[1-9])","_",searched_string)
    time_series_data=x[searched_string][0][0][0]
    data_length_sec=x[searched_string][0][0][1]
    sampling_frequency=data_length_sec=x[searched_string][0][0][2]
    channels=data_length_sec=x[searched_string][0][0][3]
    assemble_matrix(time_series_data,path)
    

In [30]:
#represent time series data as a Gramiam plot
def convert_to_Gramiam(channel_data):
    transformer=RecurrencePlot()
    X_new = transformer.transform(channel_data)
    input_size=3996
    output_size=111
    bin_size = input_size // output_size
    X_new = X_new.reshape((1,output_size, bin_size, 
                                   output_size, bin_size,)).max(4).max(2)

# Change datatype to lower memory usage
    X_new=X_new.astype(np.float16)
    return X_new

In [23]:
# Split data into 10 second long segments
def assemble_matrix(time_series_data,path):
    time_series_section=time_series_data[:,3:239763]
    equal_partitions=np.hsplit(time_series_section,60)
    image_array=get_image_matrix(equal_partitions)
    assemble_y(path)

In [24]:
# convert obtain an image for each image
def get_image_matrix(equal_partitions):]
    for idx, segment in enumerate(equal_partitions):
        image_matrix=[]
        for index, single_channel in enumerate(segment,start=0):
            single_channel=np.reshape(single_channel,(1,len(single_channel)))
            image_matrix.append(convert_to_Gramiam(single_channel))
        master_array.append(create_3D_image_array(image_matrix))
    

In [25]:
# compile array of gramiam images for each channel
def create_3D_image_array(image_matrix):
    image_1=image_matrix[0][0]
    image_2=image_matrix[1][0]
    image_3=image_matrix[2][0]
    image_4=image_matrix[3][0]
    image_5=image_matrix[4][0]
    image_6=image_matrix[5][0]
    image_7=image_matrix[6][0]
    image_8=image_matrix[7][0]
    image_9=image_matrix[8][0]
    image_10=image_matrix[9][0]
    image_11=image_matrix[10][0]
    image_12=image_matrix[11][0]
    image_13=image_matrix[12][0]
    image_14=image_matrix[13][0]
    image_15=image_matrix[14][0]
    image_16=image_matrix[15][0]

#     image_array_3D=np.dstack([image_1,image_2,image_3,image_4,image_5,image_6,image_7,image_8,image_9,
#                               image_10,image_11,image_12,image_13,image_14,image_15,image_16])

    image_array_3D=np.dstack([image_1,image_2,image_3,image_4,image_5,image_6,image_7,image_8,image_9,
                              image_10,image_11,image_12,image_13,image_14,image_15])

    return image_array_3D

In [26]:
def add_to_master(list_of_3D_arrays):
    sample_3D_array=list_of_3D_arrays
    master_array.append(list_of_3D_arrays)

In [27]:
# Create y labels
def assemble_y(path):
    match=re.search('preictal',path)
    if match:
        for i in range(60):
            master_Y.append([0,1])
    else:
        for i in range(60):
            master_Y.append([1,0])

In [28]:
def list_to_array():
    all_X=np.array(master_array)
    all_Y=np.array(master_Y)
    return all_X,all_Y

In [None]:
# For each training iteration, obtain two preictal files
def get_preictal_data(dog_files,interictal_end,preictal_end,curr_iteration):
    print("GETTING PREICTAL DATA")
    values_to_pass=curr_iteration*2    
    sequence_start=interictal_end+1+values_to_pass
    sequence_end=sequence_start+1
    
    
    file_names=[]
    file_names.append(dog_files[int(sequence_start)])
    file_names.append(dog_files[int(sequence_end)])
    
    for file in file_names:
        file_dir=bucket_name+r"/"+(file)
        with fs.open(file_dir,'rb') as f:
            matching_string=r"(?<=[0-9]_)[\S]+(?=.mat)"
            searched_string=re.search(matching_string,file)
            data_file=file
            print("Filename: {} " .format(data_file))
            x=loadmat(f)
            get_variables(x,data_file,searched_string)

In [None]:
# Hashmap to return random indices of interical data in 2-D matrix 
def get_indices(runs, end_idx):
    runs = int(runs)
    dict = {}
    cur_id = 0
    interictal_index = np.zeros([runs,8])
    for x in range(runs):
        for y in range(8):
            exists = True
            temp = 0
            while(exists):
                temp = random.randint(1, end_idx)
                if not temp in dict.values():
                    exists = False
            interictal_index[x,y] = temp
            dict[cur_id] = temp
            cur_id += 1
    return interictal_index

In [None]:
# Main function iterate through file names and train model
def run():
# class imbalance techinques
    sm = SMOTE(random_state=42, sampling_strategy=1)
    rus = RandomUnderSampler(random_state=42, sampling_strategy=0.5)
    
    for idx, dog_files in enumerate(all_dogs):
        
# For each iteration, get 8 random interical files. 
        inter_pre_indices = master_indices[idx]
        interictal_end = inter_pre_indices[0]
        run_number = (inter_pre_indices[1] - interictal_end) / 2
        dog_indices = get_indices(run_number, interictal_end)

        for row_num, row in enumerate(dog_indices):
            master_array = []
            master_Y = []
            for i, index in enumerate(row):
                file_name = dog_files[int(index)]
                file_dir = bucket_name + r"/" + (file_name)
                with fs.open(file_dir, 'rb') as f:
                    matching_string = r"(?<=[0-9]_)[\S]+(?=.mat)"
                    searched_string = re.search(matching_string, file_name)
                    data_file = file_name
                    print("Filename: {} ".format(data_file))
                    print("ClassName: {} ".format(searched_string.group(0)))
                    x = loadmat(f)
                    get_variables(x, data_file, searched_string, master_array, master_Y)
            get_preictal_data(dog_files, interictal_end, inter_pre_indices[1], row_num, master_array, master_Y)
            print("ONE ITERATION COMPLETE")
            all_X, all_Y = list_to_array(master_array, master_Y)
            all_X = all_X.reshape(len(all_X), 174960)
            
# Implement imbalance algorithms
            all_X, all_Y = rus.fit_resample(all_X, all_Y)
            new_y = []
            for k, val in enumerate(all_Y):
                if val[0] == 1:
                    new_y.append([0, 1])
                else:
                    new_y.append([1, 0])
            new_y = np.array(new_y)
            all_Y = new_y

            new_y = None
            new_y = []
            all_X, all_Y = sm.fit_resample(all_X, all_Y)
            for k, val in enumerate(all_Y):
                if val[0] == 1:
                    new_y.append([0, 1])
                else:
                    new_y.append([1, 0])

            new_y = np.array(new_y)
            
# Train model on batch
            all_X = all_X.reshape(len(all_X), 108, 108, 15, 1)
            metrics=model.train_on_batch(x=all_X, y=new_y)
            print(metrics)
            
#Reset variables to optimize memory usage
            all_X = None
            all_Y = None
            x_train = None
            y_train = None
            master_array = None
            master_Y = None
            master_array = []
            master_Y = []
            new_y = None
            gc.collect()



In [None]:
master_array=[]
master_Y=[]
run()

In [None]:
# If running on GCP, model automatically saves to bucket 
# model.save(r'C:\Users\harri\Desktop\CorTech\TeamAlpha\Saved_Model\savedModel3.h5')