In [1]:
import numpy as np
import seaborn as sn
from imblearn.over_sampling import SMOTE 
import keras
from keras.utils import np_utils
from keras.layers import Input, Dense, Dropout
from keras.models import Model, Sequential
from keras import losses
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.utils import class_weight
from sklearn.metrics import recall_score, confusion_matrix, multilabel_confusion_matrix, precision_score, precision_recall_curve, average_precision_score, make_scorer
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from pandas import DataFrame
import xlsxwriter
import time

IBIS = pd.ExcelFile("SA_and_CT_AALandfROI_08272019.xlsx")

#Get the sheets for scaling
IBIS_CT = IBIS.sheet_names[9]
CT = IBIS.parse(IBIS_CT)

#Get rid of subject names to only have features now. #Need to remove ROIs. They don't convert to floats.
#Get rid of ctx_rh_Medial_wall and ctx_lh_Medial_wall, not needed for analysis.
#Scaling
IBIS_CT_features = CT.loc[:, CT.columns] 
IBIS_CT_features = IBIS_CT_features.drop(['Case','Visit','ROI42','ROI117'], axis=1)  
scalerCT = MinMaxScaler()
scaled_CT_data = scalerCT.fit_transform(IBIS_CT_features)

#Get the sheets for scaling
IBIS_SA = IBIS.sheet_names[10]
SA = IBIS.parse(IBIS_SA)

#Get rid of subject names to only have features now. #Need to remove ROIs. They don't convert to floats.
#Get rid of ctx_rh_Medial_wall and ctx_lh_Medial_wall, not needed for analysis.
#Scaling

IBIS_SA_features = SA.loc[:, SA.columns] 
IBIS_SA_features = IBIS_SA_features.drop(['Case','Visit','ROI42','ROI117'], axis=1)  
scalerSA = MinMaxScaler()
scaled_SA_data = scalerSA.fit_transform(IBIS_SA_features)

file = pd.ExcelFile("All CTSA 2-1yr.xlsx") #Only includes IBIS subjects that have 1,2 year CT,SA.

ct1y_sheet = file.sheet_names[0]
ct1y_data = file.parse(ct1y_sheet)
ct1y_data_features = ct1y_data.loc[:, ct1y_data.columns] 
ct1y_data_features = ct1y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

ct2y_sheet = file.sheet_names[1]
ct2y_data = file.parse(ct2y_sheet)
ct2y_data_features = ct2y_data.loc[:, ct2y_data.columns] 
ct2y_data_features = ct2y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

sa1y_sheet = file.sheet_names[2]
sa1y_data = file.parse(sa1y_sheet)
sa1y_data_features = sa1y_data.loc[:, sa1y_data.columns] 
sa1y_data_features = sa1y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

sa2y_sheet = file.sheet_names[3]
sa2y_data = file.parse(sa2y_sheet)
sa2y_data_features = sa2y_data.loc[:, sa2y_data.columns] 
sa2y_data_features = sa2y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

scaled_sa1y = scalerSA.transform(sa1y_data_features)
scaled_sa2y = scalerSA.transform(sa2y_data_features)

print(ct1y_data_features.shape)
print(ct2y_data_features.shape)
print(sa1y_data_features.shape)
print(sa2y_data_features.shape)

  import pandas.util.testing as tm


(585, 148)
(585, 148)
(585, 148)
(585, 148)


In [2]:
def create_model(dropout=0.1, layer1_size=110, layer2_size=20, encoded_layer_size=15):
    # create model
    model = Sequential()
    model.add(Dense(layer1_size, input_dim=148, activation='tanh'))
    model.add(Dropout(dropout))
    model.add(Dense(layer2_size, activation='tanh'))
    model.add(Dense(encoded_layer_size, activation='tanh'))
    model.add(Dense(layer2_size, activation='tanh'))
    model.add(Dropout(dropout))
    model.add(Dense(layer1_size, activation='tanh'))
    model.add(Dense(148,activation='tanh'))
    # Compile model
    model.compile(loss='mean_absolute_error', optimizer='adam')
    return model

In [3]:
# create model
model = KerasRegressor(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [10, 15, 20, 25, 30]
epochs = [50, 100, 150] 
dropout_rate = [0.15, 0.2, 0.25]
#Hidden layer sizes
hidden_size1 = [90, 100, 110] 
hidden_size2 = [25, 30, 35, 40] 
encoded_layer_size_array = [15,20,25]

param_grid = dict(batch_size=batch_size, epochs=epochs, dropout=dropout_rate, layer1_size=hidden_size1, layer2_size=hidden_size2, encoded_layer_size=encoded_layer_size_array)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_absolute_error', n_jobs=-1, cv=5)
grid_result = grid.fit(scaled_sa1y,scaled_sa2y)



In [4]:
df = pd.DataFrame(grid_result.cv_results_)
df.to_excel("negMeanAbsoluteErrorSA.xlsx")
print('DataFrame is written to Excel File successfully.')

DataFrame is written to Excel File successfully.


In [5]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_batch_size,param_dropout,param_encoded_layer_size,param_epochs,param_layer1_size,param_layer2_size,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,4.177626,0.385590,0.131356,0.035176,10,0.15,15,50,90,25,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.092259,-0.091641,-0.090410,-0.091615,-0.092180,-0.091621,0.000661,1197
1,4.269393,0.155133,0.142673,0.035909,10,0.15,15,50,90,30,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.092725,-0.091701,-0.089947,-0.090370,-0.090843,-0.091117,0.000993,1125
2,4.268117,0.236737,0.133712,0.048618,10,0.15,15,50,90,35,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.091310,-0.090347,-0.091548,-0.091861,-0.091393,-0.091292,0.000509,1153
3,4.227995,0.339345,0.091647,0.013651,10,0.15,15,50,90,40,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.091558,-0.090515,-0.091169,-0.089793,-0.091826,-0.090972,0.000736,1110
4,4.439241,0.148241,0.118348,0.033306,10,0.15,15,50,100,25,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.092847,-0.090253,-0.090475,-0.091952,-0.091128,-0.091331,0.000961,1159
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,5.822338,0.144319,0.138228,0.020684,30,0.25,25,150,100,40,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.089674,-0.087702,-0.086506,-0.085532,-0.087728,-0.087428,0.001390,168
1616,5.944921,0.498100,0.136421,0.048759,30,0.25,25,150,110,25,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.090252,-0.090058,-0.089048,-0.087532,-0.088621,-0.089102,0.000994,698
1617,5.605527,0.113880,0.127138,0.030085,30,0.25,25,150,110,30,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.090149,-0.089282,-0.086776,-0.086727,-0.087923,-0.088171,0.001359,375
1618,5.604672,0.109768,0.116619,0.017387,30,0.25,25,150,110,35,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.090003,-0.087725,-0.086623,-0.086026,-0.087565,-0.087588,0.001358,204


In [6]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.085522 using {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 25, 'epochs': 150, 'layer1_size': 100, 'layer2_size': 40}
-0.091621 (0.000661) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 25}
-0.091117 (0.000993) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 30}
-0.091292 (0.000509) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 35}
-0.090972 (0.000736) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 40}
-0.091331 (0.000961) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 100, 'layer2_size': 25}
-0.091906 (0.001645) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 100, 'layer2_size': 30}
-0.091484 (0.0