# Load Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import os
import pickle


In [2]:
def load_filtred_files(path): 
    fichiers=[] 
    for dirname, _, filenames in os.walk(path): 
        for filename in filenames: 
            fichiers.append(os.path.join(dirname, filename)) 
    files=[]
    for f in fichiers:
        if f.endswith(".npy"):
            files.append(f)
    
    return files

In [3]:
path1=r"C:\Users\21655\Desktop\Semestre2\Projet DS\data\edf\epilepsy\01_tcp_ar"
path2=r"C:\Users\21655\Desktop\Semestre2\Projet DS\data\edf\no_epilepsy\01_tcp_ar"
epilepsy_filtred_files=load_filtred_files(path1)
no_epilepsy_filtred_files=load_filtred_files(path2)

In [4]:
def get_id(path,patient):
    if patient == 'epilepsy':
        id_p=path[75:len(path)-39]
    else:
        id_p=path[78:len(path)-39]
    return id_p

In [5]:
filtred_files = []

for i in epilepsy_filtred_files:
    filtred_files.append([i,'epilepsy',get_id(i,'epilepsy')])
        
for i in no_epilepsy_filtred_files:
    filtred_files.append([i,'no_epilepsy',get_id(i,'no_epilepsy')])

# Prepare the data

In [6]:
filtred_files[0][0]

'C:\\Users\\21655\\Desktop\\Semestre2\\Projet DS\\data\\edf\\epilepsy\\01_tcp_ar\\003\\00000355\\s003_2013_01_04\\00000355_s003_t000.npy'

In [7]:
rd=np.load(filtred_files[0][0])

In [8]:
rd.shape

(21, 331250)

In [9]:
rd

array([[ 0.00000000e+00,  0.00000000e+00,  1.89243836e-06, ...,
         3.49999977e-09,  3.49999979e-09,  3.49999981e-09],
       [ 0.00000000e+00,  0.00000000e+00,  1.26682850e-06, ...,
         3.49999975e-09,  3.49999978e-09,  3.49999980e-09],
       [ 0.00000000e+00,  0.00000000e+00, -4.57413314e-07, ...,
         3.50000007e-09,  3.50000006e-09,  3.50000006e-09],
       ...,
       [ 0.00000000e+00,  0.00000000e+00,  7.63288856e-07, ...,
         3.49999987e-09,  3.49999988e-09,  3.49999990e-09],
       [ 0.00000000e+00,  0.00000000e+00, -2.89566765e-07, ...,
         3.50000001e-09,  3.50000001e-09,  3.50000001e-09],
       [ 0.00000000e+00,  0.00000000e+00, -1.17457584e-06, ...,
         3.50000000e-09,  3.50000000e-09,  3.50000000e-09]])

In [10]:
import math
win_size = 5000
dt_length = math.floor (rd.shape[1] / win_size)

In [11]:
data=[]
for i in range(dt_length):
    data.append (rd[:, i*win_size : i*win_size+win_size])

In [12]:
len(data)

66

In [13]:
data[0][0].shape[0]

5000

In [14]:
data[0]

array([[ 0.00000000e+00,  0.00000000e+00,  1.89243836e-06, ...,
        -3.23800121e-05, -3.19950522e-05, -3.14502242e-05],
       [ 0.00000000e+00,  0.00000000e+00,  1.26682850e-06, ...,
        -3.01031307e-05, -2.96254246e-05, -2.87529846e-05],
       [ 0.00000000e+00,  0.00000000e+00, -4.57413314e-07, ...,
        -9.04695686e-06, -9.05743780e-06, -9.15842331e-06],
       ...,
       [ 0.00000000e+00,  0.00000000e+00,  7.63288856e-07, ...,
        -1.41663934e-05, -1.39701062e-05, -1.37171539e-05],
       [ 0.00000000e+00,  0.00000000e+00, -2.89566765e-07, ...,
        -4.77776491e-06, -4.60481396e-06, -4.57122832e-06],
       [ 0.00000000e+00,  0.00000000e+00, -1.17457584e-06, ...,
        -3.24158146e-06, -3.16121375e-06, -3.21095303e-06]])

In [15]:
data[0].shape

(21, 5000)

In [16]:
data[0].reshape(-1,1)

array([[ 0.00000000e+00],
       [ 0.00000000e+00],
       [ 1.89243836e-06],
       ...,
       [-3.24158146e-06],
       [-3.16121375e-06],
       [-3.21095303e-06]])

In [17]:
X = []
for j in range(dt_length):
    X.append (data[j].reshape(-1,1))

In [18]:
X[0].shape

(105000, 1)

In [19]:
len(X)

66

In [20]:
#Window size * # of electrode channels gives us number of features - input to the NN

win_size = 5000 # 20s can be changed


def prepX (fname,tag):
    
    dt_length = 0
    rm=np.load(fname)
    
    if tag=='no_epilepsy':
        
        rd=rm
        dt_length = math.floor (rd.shape[1] / win_size)
        Y = [0] * dt_length
        
    else:

        rd=rm
        dt_length = math.floor (rd.shape[1] / win_size)
        Y = [1] * dt_length
    

    data=[]
    for i in range(dt_length):
        data.append (rd[:, i*win_size : i*win_size+win_size])
       
        
    X = []
    for j in range(dt_length):
        X.append (data[j].reshape(-1,1))

        
    return X, Y

In [21]:
from random import shuffle
import pickle

In [22]:
filtred_files[0][0]

'C:\\Users\\21655\\Desktop\\Semestre2\\Projet DS\\data\\edf\\epilepsy\\01_tcp_ar\\003\\00000355\\s003_2013_01_04\\00000355_s003_t000.npy'

In [None]:
#This part takes a while
from random import shuffle
import pickle

fns  = []

fns=filtred_files
shuffle(fns)

for i in range(len(fns)):
    print(fns[i])
    X_f, Y_f = prepX(fns[i][0],fns[i][1])

    
    dbfile = open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + fns[i][0][-22:-4], 'wb')
      
    # pickle the preproced data
    pickle.dump([X_f, Y_f], dbfile)                   
    dbfile.close()

## Get the preprocessed data and feed into the model


In [23]:
prep_files = [fn for fn in os.listdir("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\")]
#prep_files

In [24]:
shuffle(prep_files)

In [25]:
#prep_files

In [26]:
#Splitting into train/test sets
from sklearn.model_selection import train_test_split

Prep_train,Prep_test = train_test_split(prep_files, test_size=0.2,random_state=42)


In [27]:
len(Prep_train)

1001

In [28]:
len(Prep_test)

251

# LSTM 1

In [30]:
def batch_generator(prep_files,batch_size,steps):
    
    idx=1
    while True: 
        yield load_data(prep_files,idx-1,batch_size)## Yields data
        if idx<steps:
            idx+=1
        else:
            idx=1

In [31]:
def load_data(prep_files,idx,batch_size):
    
    X_list = []
    Y_list = []

    for f in prep_files[idx*batch_size:idx*batch_size+batch_size]:

        with open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + f, 'rb') as f:
            X_f, Y_f = pickle.load(f)
            X_list.extend(X_f)
            Y_list.extend(Y_f)
            f.close()

    X = np.asarray(X_list)
    X = X.reshape(X.shape[0], X.shape[1])

    Y = np.asarray(Y_list)
    Y = Y.reshape(Y.shape[0],1)

    #Normalizing the feature values (very important for NNs)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    X = np.reshape(X, (X.shape[0],1,X.shape[1]))

    return X,Y

In [32]:
import sklearn.linear_model

import sklearn.metrics
from sklearn.preprocessing import StandardScaler

In [33]:
## importing libraries
import tensorflow as tf
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
tf.keras.backend.clear_session()

# some model parameters
output_dim = 2
input_dim = 105000
batch_size = 10 
nb_epoch = 20
steps_per_epoch=np.ceil(1001/batch_size)
validation_steps=np.ceil(251/batch_size)

### Generator objects for train and validation
my_training_batch_generator = batch_generator(Prep_train, 10,steps_per_epoch)
my_validation_batch_generator = batch_generator(Prep_test, 10,validation_steps)

model = Sequential()
model.add(LSTM(4, stateful=True, return_sequences=False,input_shape=(1,105000), batch_size=1))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam',  metrics = ['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (1, 4)                    1680080   
_________________________________________________________________
dense (Dense)                (1, 1)                    5         
Total params: 1,680,085
Trainable params: 1,680,085
Non-trainable params: 0
_________________________________________________________________


In [31]:
model.fit_generator(my_training_batch_generator,
epochs=20,steps_per_epoch=steps_per_epoch,
 verbose=1, validation_data=my_validation_batch_generator, 
validation_steps=validation_steps)



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2af930e8d08>

In [32]:
model.save('model_lsttm')



INFO:tensorflow:Assets written to: model_lsttm\assets


INFO:tensorflow:Assets written to: model_lsttm\assets


In [34]:
loaded_model = tf.keras.models.load_model(r"C:\Users\21655\Desktop\Projet\model_lsttm")

In [None]:
loss,acc=loaded_model.evaluate_generator(my_validation_batch_generator)
print("accuracy: {:5.2f}% ".format(100*acc))
print("loss: {:5.2f}% ".format(100*loss))

# LSTM 1-1

In [29]:
def batch_generator(prep_files,batch_size,steps):
    
    idx=1
    while True: 
        yield load_data(prep_files,idx-1,batch_size)## Yields data
        if idx<steps:
            idx+=1
        else:
            idx=1

In [39]:
def load_data(prep_files,idx,batch_size):
    
    X_list = []
    Y_list = []

    for f in prep_files[idx*batch_size:idx*batch_size+batch_size]:

        with open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + f, 'rb') as f:
            X_f, Y_f = pickle.load(f)
            X_list.extend(X_f)
            Y_list.extend(Y_f)
            f.close()

    X = np.asarray(X_list)
    X = X.reshape(X.shape[0], X.shape[1])

    Y = np.asarray(Y_list)
    Y = Y.reshape(Y.shape[0],1)

    #Normalizing the feature values (very important for NNs)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    X = np.reshape(X, (X.shape[0],1,X.shape[1]))
    
    #Y = to_categorical(Y)

    return X,Y

In [31]:
import sklearn.linear_model

import sklearn.metrics
from sklearn.preprocessing import StandardScaler
from keras.utils import to_categorical

In [40]:
## importing libraries
import tensorflow as tf
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
tf.keras.backend.clear_session()

# some model parameters
output_dim = 2
input_dim = 105000
batch_size = 10 
nb_epoch = 20
steps_per_epoch=np.ceil(1001/batch_size)
validation_steps=np.ceil(251/batch_size)

### Generator objects for train and validation
my_training_batch_generator = batch_generator(Prep_train, 10,steps_per_epoch)
my_validation_batch_generator = batch_generator(Prep_test, 10,validation_steps)

model = Sequential()
model.add(LSTM(4,activation='relu', return_sequences=False,input_shape=(1,105000)))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam',  metrics = ['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 4)                 1680080   
_________________________________________________________________
dense (Dense)                (None, 1)                 5         
Total params: 1,680,085
Trainable params: 1,680,085
Non-trainable params: 0
_________________________________________________________________


In [41]:
model.fit(my_training_batch_generator,
epochs=20,steps_per_epoch=steps_per_epoch,
 verbose=1, validation_data=my_validation_batch_generator, 
validation_steps=validation_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x16b00d77d88>

In [42]:
model.save('model_LSTM_1')

INFO:tensorflow:Assets written to: model_LSTM_1\assets


# LSTM 2

In [25]:
def batch_generator(prep_files,batch_size,steps):
    
    idx=1
    while True: 
        yield load_data(prep_files,idx-1,batch_size)## Yields data
        if idx<steps:
            idx+=1
        else:
            idx=1

In [26]:
def load_data(prep_files,idx,batch_size):
    
    X_list = []
    Y_list = []

    for f in prep_files[idx*batch_size:idx*batch_size+batch_size]:

        with open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + f, 'rb') as f:
            X_f, Y_f = pickle.load(f)
            X_list.extend(X_f)
            Y_list.extend(Y_f)
            f.close()

    X = np.asarray(X_list)
    X = X.reshape(X.shape[0], X.shape[1])

    Y = np.asarray(Y_list)
    Y = Y.reshape(Y.shape[0],1)

    #Normalizing the feature values (very important for NNs)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    X = np.reshape(X, (X.shape[0],1,X.shape[1]))

    return X,Y

In [27]:
import sklearn.linear_model

import sklearn.metrics
from sklearn.preprocessing import StandardScaler

In [30]:
## importing libraries
import tensorflow as tf
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense, Dropout, Bidirectional
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
tf.keras.backend.clear_session()

# some model parameters
output_dim = 2
input_dim = 105000
batch_size = 10 
nb_epoch = 15
steps_per_epoch=np.ceil(1001/batch_size)
validation_steps=np.ceil(251/batch_size)
### Generator objects for train and validation
my_training_batch_generator = batch_generator(Prep_train, 10,steps_per_epoch)
my_validation_batch_generator = batch_generator(Prep_test, 10,validation_steps)


model_1 = Sequential()
model_1.add(Bidirectional(LSTM(4, stateful=True, return_sequences=False),input_shape=(1,105000), batch_size=1))
model_1.add(Dense(1, activation='sigmoid'))
model_1.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

model_1.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (1, 8)                    3360160   
_________________________________________________________________
dense (Dense)                (1, 1)                    9         
Total params: 3,360,169
Trainable params: 3,360,169
Non-trainable params: 0
_________________________________________________________________


In [31]:
model_1.fit_generator(my_training_batch_generator,
epochs=nb_epoch,steps_per_epoch=steps_per_epoch,
 verbose=1, validation_data=my_validation_batch_generator, 
validation_steps=validation_steps)



Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<tensorflow.python.keras.callbacks.History at 0x156be842948>

In [32]:
model_1.save('model_LSTM_2')



INFO:tensorflow:Assets written to: model_LSTM_2\assets


INFO:tensorflow:Assets written to: model_LSTM_2\assets


# LSTM 3

In [31]:
def batch_generator(prep_files,batch_size,steps):
    
    idx=1
    while True: 
        yield load_data(prep_files,idx-1,batch_size)## Yields data
        if idx<steps:
            idx+=1
        else:
            idx=1

In [32]:
def load_data(prep_files,idx,batch_size):
    
    X_list = []
    Y_list = []

    for f in prep_files[idx*batch_size:idx*batch_size+batch_size]:

        with open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + f, 'rb') as f:
            X_f, Y_f = pickle.load(f)
            X_list.extend(X_f)
            Y_list.extend(Y_f)
            f.close()

    X = np.asarray(X_list)
    X = X.reshape(X.shape[0], X.shape[1])

    Y = np.asarray(Y_list)
    Y = Y.reshape(Y.shape[0],1)

    #Normalizing the feature values (very important for NNs)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    X = np.reshape(X, (X.shape[0],1,X.shape[1]))

    return X,Y

In [33]:
import sklearn.linear_model

import sklearn.metrics
from sklearn.preprocessing import StandardScaler

In [41]:
## importing libraries
import tensorflow as tf
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
tf.keras.backend.clear_session()

# some model parameters
output_dim = 2
input_dim = 105000
batch_size = 10 
nb_epoch = 100
steps_per_epoch=np.ceil(1001/batch_size)
validation_steps=np.ceil(251/batch_size)
### Generator objects for train and validation
my_training_batch_generator = batch_generator(Prep_train, 10,steps_per_epoch)
my_validation_batch_generator = batch_generator(Prep_test, 10,validation_steps)


model = Sequential()
model.add(LSTM(64, return_sequences=True,input_shape=(1,105000),activation='relu'))
model.add(Dropout(0.2))
model.add(LSTM(32,activation="relu"))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam',  metrics = ['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 1, 64)             26896640  
_________________________________________________________________
dropout (Dropout)            (None, 1, 64)             0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 26,909,089
Trainable params: 26,909,089
Non-trainable params: 0
_________________________________________________________________


In [42]:
model.fit_generator(my_training_batch_generator,
epochs=20,steps_per_epoch=steps_per_epoch,
 verbose=1, validation_data=my_validation_batch_generator, 
validation_steps=validation_steps)



Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x1b621a3de08>

In [43]:
model.save('model_LSTM_3')

INFO:tensorflow:Assets written to: model_LSTM_3\assets


# GRU

In [43]:
def batch_generator(prep_files,batch_size,steps):
    
    idx=1
    while True: 
        yield load_data(prep_files,idx-1,batch_size)## Yields data
        if idx<steps:
            idx+=1
        else:
            idx=1

In [44]:
def load_data(prep_files,idx,batch_size):
    
    X_list = []
    Y_list = []

    for f in prep_files[idx*batch_size:idx*batch_size+batch_size]:

        with open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + f, 'rb') as f:
            X_f, Y_f = pickle.load(f)
            X_list.extend(X_f)
            Y_list.extend(Y_f)
            f.close()

    X = np.asarray(X_list)
    X = X.reshape(X.shape[0], X.shape[1])

    Y = np.asarray(Y_list)
    Y = Y.reshape(Y.shape[0],1)

    #Normalizing the feature values (very important for NNs)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    X = np.reshape(X, (X.shape[0],1,X.shape[1]))
    
    
    #Y = to_categorical(Y)

    return X,Y

In [45]:
import sklearn.linear_model

import sklearn.metrics
from sklearn.preprocessing import StandardScaler
from keras.utils import to_categorical

In [46]:
## importing libraries
import tensorflow as tf
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense, Dropout, Bidirectional
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM, GRU
tf.keras.backend.clear_session()

# some model parameters
output_dim = 2
input_dim = 105000
batch_size = 10 
nb_epoch = 20
steps_per_epoch=np.ceil(1001/batch_size)
validation_steps=np.ceil(251/batch_size)
### Generator objects for train and validation
my_training_batch_generator = batch_generator(Prep_train, 10,steps_per_epoch)
my_validation_batch_generator = batch_generator(Prep_test, 10,validation_steps)

model_5 = Sequential()
model_5.add(Bidirectional(GRU(4, stateful=True, return_sequences=False),input_shape=(1, 105000), batch_size=1))
model_5.add(Dense(1, activation='sigmoid'))
model_5.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model_5.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (1, 8)                    2520144   
_________________________________________________________________
dense (Dense)                (1, 1)                    9         
Total params: 2,520,153
Trainable params: 2,520,153
Non-trainable params: 0
_________________________________________________________________


In [None]:
model_5.fit(my_training_batch_generator,
epochs=nb_epoch,steps_per_epoch=steps_per_epoch,
 verbose=1, validation_data=my_validation_batch_generator, 
validation_steps=validation_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
 22/101 [=====>........................] - ETA: 6:52 - loss: 0.4314 - accuracy: 0.8360

In [None]:
model_5.save('model_GRU')

In [None]:
loss,acc=model_5.evaluate(my_validation_batch_generator)
print("accuracy: {:5.2f}% ".format(100*acc))
print("loss: {:5.2f}% ".format(100*loss))

# DENSE

In [29]:
def batch_generator(prep_files,batch_size,steps):
    
    idx=1
    while True: 
        yield load_data(prep_files,idx-1,batch_size)## Yields data
        if idx<steps:
            idx+=1
        else:
            idx=1

In [30]:
def load_data(prep_files,idx,batch_size):
    
    X_list = []
    Y_list = []

    for f in prep_files[idx*batch_size:idx*batch_size+batch_size]:

        with open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + f, 'rb') as f:
            X_f, Y_f = pickle.load(f)
            X_list.extend(X_f)
            Y_list.extend(Y_f)
            f.close()

    X = np.asarray(X_list)
    X = X.reshape(X.shape[0], X.shape[1])

    Y = np.asarray(Y_list)
    Y = Y.reshape(Y.shape[0],1)

    #Normalizing the feature values (very important for NNs)
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    

    return X,Y

In [31]:
import sklearn.linear_model

import sklearn.metrics
from sklearn.preprocessing import StandardScaler

In [32]:
## importing libraries
import tensorflow as tf
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Activation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import LSTM
tf.keras.backend.clear_session()

# some model parameters
output_dim = 2
input_dim = 105000
batch_size = 10 
nb_epoch = 20
steps_per_epoch=np.ceil(1001/batch_size)
validation_steps=np.ceil(251/batch_size)
### Generator objects for train and validation
my_training_batch_generator = batch_generator(Prep_train, 10,steps_per_epoch)
my_validation_batch_generator = batch_generator(Prep_test, 10,validation_steps)

#Topology
model_4 = Sequential()

model_4.add(Dense(100, input_dim=105000,activation='relu'))
model_4.add(BatchNormalization())
model_4.add(Dropout(0.2))

model_4.add(Dense(100,activation='relu'))
model_4.add(BatchNormalization())
model_4.add(Dropout(0.2))

model_4.add(Dense(1,activation='sigmoid'))

model_4.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_4.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               10500100  
_________________________________________________________________
batch_normalization (BatchNo (None, 100)               400       
_________________________________________________________________
dropout (Dropout)            (None, 100)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
batch_normalization_1 (Batch (None, 100)               400       
_________________________________________________________________
dropout_1 (Dropout)          (None, 100)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 1

In [33]:
model_4.fit(my_training_batch_generator,
epochs=nb_epoch,steps_per_epoch=steps_per_epoch,
 verbose=1, validation_data=my_validation_batch_generator, 
validation_steps=validation_steps)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x11cf3a8b5c8>

In [35]:
loss,acc=model_4.evaluate(my_validation_batch_generator)
print("accuracy: {:5.2f}% ".format(100*acc))
print("loss: {:5.2f}% ".format(100*loss))

   3025/Unknown - 21803s 7s/step - loss: 0.3740 - accuracy: 0.8196

KeyboardInterrupt: 

In [34]:
model_4.save('model_DENSE')

INFO:tensorflow:Assets written to: model_DENSE\assets


# TEST

In [46]:
#Read features from the pickled files
X_list = []
Y_list = []

prep_files = [fn for fn in os.listdir("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\")]

for f in prep_files[:25]:
   with open("C:\\Users\\21655\\Desktop\\Projet\\Processed_Data\\" + f, 'rb') as f:
    X_f, Y_f = pickle.load(f)
    X_list.extend(X_f)
    Y_list.extend(Y_f)
    f.close()

In [47]:
len(X_list)
len(Y_list)

889

In [48]:
X = np.asarray(X_list)
X = X.reshape(X.shape[0], X.shape[1])

Y = np.asarray(Y_list)
Y = Y.reshape(Y.shape[0],1)

In [49]:
X.shape

(889, 105000)

In [50]:
Y.shape

(889, 1)

In [51]:
import sklearn.linear_model

import sklearn.metrics
from sklearn.preprocessing import StandardScaler

In [52]:
#Normalizing the feature values (very important for NNs)
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [53]:
#Splitting into train/test sets
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [54]:
import keras
import keras.backend as K
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM, Dropout, BatchNormalization

In [55]:
#Topology
model = Sequential()

model.add(Dense(100, input_dim=105000))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(700))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(1))
model.add(Activation('sigmoid'))

In [42]:
X_train = np.reshape(X_train, (X_train.shape[0],1,X.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0],1,X.shape[1]))

In [56]:
adam = keras.optimizers.Adam(lr=0.1, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [40]:
model = Sequential()
model.add(LSTM(4, stateful=True, return_sequences=False,input_shape=(1,105000), batch_size=1))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='mse', optimizer='adam')

In [57]:
#train the model
model.fit(X_train, Y_train, epochs=100, batch_size=1024)
score, acc = model.evaluate(X_test, Y_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [58]:
#Get predictions for the test set
ytest = model.predict_classes(X_test)



In [61]:
model.save('model_test')

INFO:tensorflow:Assets written to: model_test\assets


In [63]:
from sklearn.metrics import classification_report, roc_auc_score
print(classification_report(Y_test, ytest))
print(roc_auc_score(Y_test, ytest))

              precision    recall  f1-score   support

           1       1.00      1.00      1.00       178

    accuracy                           1.00       178
   macro avg       1.00      1.00      1.00       178
weighted avg       1.00      1.00      1.00       178



ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.