In [14]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# for data scaling and splitting
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
# for neural net
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
# for evaluation
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, recall_score,\
    precision_score, f1_score, classification_report, plot_confusion_matrix

In [8]:
data = pd.read_csv("data/combined_expression.csv")

In [9]:
data['classification'].replace({1: 0, 2: 1}, inplace=True)
data.head()

Unnamed: 0,CELL_LINE_NAME,classification,TSPAN6,TNMD,DPM1,SCYL3,C1orf112,FGR,CFH,FUCA2,...,COL15A1,C6orf10,TMEM225,NOTCH4,PBX2,AGER,RNF5,AGPAT1,DFNB59,PRRT1
0,1240121,0,6.419526,3.182094,9.320548,3.759654,3.802619,3.215753,4.698729,7.873672,...,3.245454,2.953508,3.543429,3.352022,4.67231,3.641128,3.13531,3.737072,3.450927,3.1688
1,1240122,1,7.646494,2.626819,10.153853,3.564755,3.942749,3.29076,3.551675,8.252413,...,2.786709,3.077382,3.728232,3.208882,4.58684,3.395654,3.5868,3.519128,3.115323,3.051645
2,1240123,0,8.319417,3.111183,9.643558,4.757258,3.919757,3.602185,3.329644,9.07695,...,3.459089,3.085394,3.462811,3.33903,4.614897,3.395845,3.419193,3.971646,3.72931,3.320022
3,1240124,0,9.006994,3.028173,9.6867,4.280504,3.147646,3.188881,3.293807,8.67879,...,2.835403,2.960303,3.415083,3.290171,4.770123,3.400821,3.383734,3.798107,2.822404,3.297547
4,1240127,0,7.985676,2.694729,10.676134,4.159685,3.804637,3.481942,3.111261,7.555407,...,2.896523,2.849899,3.480114,3.226128,5.83271,3.612179,3.347095,4.457963,5.198524,4.553586


In [10]:
selected_genes = pd.read_csv('cleaned/boruta-99-25-0.01.csv')
selected_genes = selected_genes.values.tolist()
selected_genes = list(itertools.chain(*selected_genes))

In [11]:
# retrieving proper columns
X = data.loc[:, selected_genes].values
y = data['classification'].values
# scaling the data
scalar = MinMaxScaler()
x_scaled = scalar.fit_transform(X)
# splitting data (20% test, 80% train)
X_train, X_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.2, random_state=0)

# Gridsearch for Input and Output Layer (One Hidden Layer)

## Optimizing Epochs and Batches

In [12]:
def create_model(optimizer='adam'):
    model = Sequential()
    # adding layers
    model.add(Dense(len(selected_genes), activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # compiling
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [15]:
model = KerasClassifier(build_fn=create_model)
epochs = [25, 50, 100, 150]
batches = [16, 32, 64, 128]
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(epochs=epochs, batch_size=batches, optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [16]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.750447 using {'batch_size': 16, 'epochs': 100, 'optimizer': 'Adagrad'}


In [17]:
grid_result.cv_results_

{'mean_fit_time': array([ 4.85296779,  6.52042532,  5.42756615,  7.90591793,  7.65490971,
         6.5323441 , 10.20223951,  9.02838345, 11.49187579, 11.39599109,
        16.3142252 , 15.2733242 , 12.79265385, 16.83567848, 14.19800825,
        20.1476685 , 18.51353359, 26.92441974, 24.93766327, 21.72245255,
        25.36375413, 28.83345923, 40.76835079, 39.61854196, 49.98053532,
        47.14785657, 39.16147728, 40.27613959,  4.9513906 ,  7.64798994,
        10.64718614, 15.52412639,  9.20608892,  6.55431342,  9.22079315,
         5.7566752 ,  8.59237552,  8.14730349,  9.68278985,  9.06749988,
         7.81920104, 10.9454742 ,  9.5213398 , 13.33332181, 12.17125192,
        15.05213199, 16.2689734 , 13.46293483, 17.90104504, 18.81385846,
        22.59538598, 23.9940237 , 29.11310315, 27.94198818, 24.56344566,
        24.18605661,  4.67051854,  6.53351436,  6.07374573,  8.62956052,
         7.82396755,  6.02028341,  8.17353101,  5.69714479,  6.8679997 ,
         6.04347858,  7.84312382, 

# Testing the Model

In [18]:
y_pred = grid.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.88      0.79        75
           1       0.75      0.50      0.60        54

    accuracy                           0.72       129
   macro avg       0.73      0.69      0.69       129
weighted avg       0.73      0.72      0.71       129



In [19]:
print(confusion_matrix(y_test, y_pred))

[[66  9]
 [27 27]]


In [22]:
plot_confusion_matrix(grid, y_test, y_pred)

ValueError: plot_confusion_matrix only supports classifiers

In [None]:
model.save('model/model_1.h5')