In [1]:
import numpy as np
import seaborn as sn
from imblearn.over_sampling import SMOTE 
import keras
from keras.utils import np_utils
from keras.layers import Input, Dense, Dropout
from keras.models import Model, Sequential
from keras import losses
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.utils import class_weight
from sklearn.metrics import recall_score, confusion_matrix, multilabel_confusion_matrix, precision_score, precision_recall_curve, average_precision_score, make_scorer
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from pandas import DataFrame
import xlsxwriter
import time

IBIS = pd.ExcelFile("SA_and_CT_AALandfROI_08272019.xlsx")

#Get the sheets for scaling
IBIS_CT = IBIS.sheet_names[9]
CT = IBIS.parse(IBIS_CT)

#Get rid of subject names to only have features now. #Need to remove ROIs. They don't convert to floats.
#Get rid of ctx_rh_Medial_wall and ctx_lh_Medial_wall, not needed for analysis.
#Scaling
IBIS_CT_features = CT.loc[:, CT.columns] 
IBIS_CT_features = IBIS_CT_features.drop(['Case','Visit','ROI42','ROI117'], axis=1)  
scalerCT = MinMaxScaler()
scaled_CT_data = scalerCT.fit_transform(IBIS_CT_features)

#Get the sheets for scaling
IBIS_SA = IBIS.sheet_names[10]
SA = IBIS.parse(IBIS_SA)

#Get rid of subject names to only have features now. #Need to remove ROIs. They don't convert to floats.
#Get rid of ctx_rh_Medial_wall and ctx_lh_Medial_wall, not needed for analysis.
#Scaling

IBIS_SA_features = SA.loc[:, SA.columns] 
IBIS_SA_features = IBIS_SA_features.drop(['Case','Visit','ROI42','ROI117'], axis=1)  
scalerSA = MinMaxScaler()
scaled_SA_data = scalerSA.fit_transform(IBIS_SA_features)

file = pd.ExcelFile("All CTSA 2-1yr.xlsx") #Only includes IBIS subjects that have 1,2 year CT,SA.

ct1y_sheet = file.sheet_names[0]
ct1y_data = file.parse(ct1y_sheet)
ct1y_data_features = ct1y_data.loc[:, ct1y_data.columns] 
ct1y_data_features = ct1y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

ct2y_sheet = file.sheet_names[1]
ct2y_data = file.parse(ct2y_sheet)
ct2y_data_features = ct2y_data.loc[:, ct2y_data.columns] 
ct2y_data_features = ct2y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

sa1y_sheet = file.sheet_names[2]
sa1y_data = file.parse(sa1y_sheet)
sa1y_data_features = sa1y_data.loc[:, sa1y_data.columns] 
sa1y_data_features = sa1y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

sa2y_sheet = file.sheet_names[3]
sa2y_data = file.parse(sa2y_sheet)
sa2y_data_features = sa2y_data.loc[:, sa2y_data.columns] 
sa2y_data_features = sa2y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

scaled_sa1y = scalerSA.transform(sa1y_data_features)
scaled_sa2y = scalerSA.transform(sa2y_data_features)

print(ct1y_data_features.shape)
print(ct2y_data_features.shape)
print(sa1y_data_features.shape)
print(sa2y_data_features.shape)

  import pandas.util.testing as tm


(585, 148)
(585, 148)
(585, 148)
(585, 148)


In [2]:
def create_model(dropout=0.1, layer1_size=110, layer2_size=20, encoded_layer_size=15):
    # create model
    model = Sequential()
    model.add(Dense(layer1_size, input_dim=148, activation='tanh'))
    model.add(Dropout(dropout))
    model.add(Dense(layer2_size, activation='tanh'))
    model.add(Dense(encoded_layer_size, activation='tanh'))
    model.add(Dense(layer2_size, activation='tanh'))
    model.add(Dropout(dropout))
    model.add(Dense(layer1_size, activation='tanh'))
    model.add(Dense(148,activation='tanh'))
    # Compile model
    model.compile(loss='mean_absolute_error', optimizer='adam')
    return model

In [3]:
# create model
model = KerasRegressor(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [10, 15, 20, 25, 30]
epochs = [50, 100, 150] 
dropout_rate = [0.15, 0.2, 0.25]
#Hidden layer sizes
hidden_size1 = [90, 100, 110] 
hidden_size2 = [25, 30, 35, 40] 
encoded_layer_size_array = [15,20,25]

param_grid = dict(batch_size=batch_size, epochs=epochs, dropout=dropout_rate, layer1_size=hidden_size1, layer2_size=hidden_size2, encoded_layer_size=encoded_layer_size_array)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='explained_variance', n_jobs=-1, cv=5)
grid_result = grid.fit(scaled_sa2y,scaled_sa1y)



In [4]:
df = pd.DataFrame(grid_result.cv_results_)
df.to_excel("ExplainedVarianceSA.xlsx")
print('DataFrame is written to Excel File successfully.')

DataFrame is written to Excel File successfully.


In [5]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_batch_size,param_dropout,param_encoded_layer_size,param_epochs,param_layer1_size,param_layer2_size,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,3.876764,0.312921,0.137082,0.085338,10,0.15,15,50,90,25,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",0.182796,0.148919,0.193077,0.178973,0.177655,0.176284,0.014713,1234
1,3.521044,0.244148,0.158134,0.068552,10,0.15,15,50,90,30,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",0.180208,0.154224,0.190231,0.187887,0.177175,0.177945,0.012792,1216
2,3.660894,0.293677,0.126530,0.088415,10,0.15,15,50,90,35,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",0.193169,0.167198,0.204548,0.190417,0.204613,0.191989,0.013679,1051
3,3.465224,0.345128,0.120823,0.068619,10,0.15,15,50,90,40,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",0.207244,0.158259,0.205295,0.206489,0.194949,0.194447,0.018635,1018
4,3.551370,0.366447,0.185614,0.069870,10,0.15,15,50,100,25,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",0.189886,0.162209,0.195103,0.192022,0.182959,0.184436,0.011809,1149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,4.243314,0.338698,0.174584,0.099433,30,0.25,25,150,100,40,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",0.230902,0.199297,0.241646,0.235102,0.239028,0.229195,0.015384,234
1616,4.058358,0.454326,0.124002,0.065091,30,0.25,25,150,110,25,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",0.210129,0.189704,0.231166,0.215798,0.206161,0.210592,0.013465,683
1617,3.966538,0.408752,0.111680,0.069645,30,0.25,25,150,110,30,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",0.220538,0.195506,0.235024,0.223615,0.208284,0.216593,0.013552,500
1618,3.555523,0.220138,0.099282,0.023510,30,0.25,25,150,110,35,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",0.239341,0.205802,0.239952,0.217442,0.233078,0.227123,0.013399,268


In [6]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.262851 using {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 25, 'epochs': 150, 'layer1_size': 100, 'layer2_size': 40}
0.176284 (0.014713) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 25}
0.177945 (0.012792) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 30}
0.191989 (0.013679) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 35}
0.194447 (0.018635) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 40}
0.184436 (0.011809) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 100, 'layer2_size': 25}
0.187482 (0.011806) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 100, 'layer2_size': 30}
0.189783 (0.012758) w