In [1]:
import numpy as np
import seaborn as sn
from imblearn.over_sampling import SMOTE 
import keras
from keras.utils import np_utils
from keras.layers import Input, Dense, Dropout
from keras.models import Model, Sequential
from keras import losses
from keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.utils import class_weight
from sklearn.metrics import recall_score, confusion_matrix, multilabel_confusion_matrix, precision_score, precision_recall_curve, average_precision_score, make_scorer
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from pandas import DataFrame
import xlsxwriter
import time

IBIS = pd.ExcelFile("SA_and_CT_AALandfROI_08272019.xlsx")

#Get the sheets for scaling
IBIS_CT = IBIS.sheet_names[9]
CT = IBIS.parse(IBIS_CT)

#Get rid of subject names to only have features now. #Need to remove ROIs. They don't convert to floats.
#Get rid of ctx_rh_Medial_wall and ctx_lh_Medial_wall, not needed for analysis.
#Scaling
IBIS_CT_features = CT.loc[:, CT.columns] 
IBIS_CT_features = IBIS_CT_features.drop(['Case','Visit','ROI42','ROI117'], axis=1)  
scalerCT = MinMaxScaler()
scaled_CT_data = scalerCT.fit_transform(IBIS_CT_features)

#Get the sheets for scaling
IBIS_SA = IBIS.sheet_names[10]
SA = IBIS.parse(IBIS_SA)

#Get rid of subject names to only have features now. #Need to remove ROIs. They don't convert to floats.
#Get rid of ctx_rh_Medial_wall and ctx_lh_Medial_wall, not needed for analysis.
#Scaling

IBIS_SA_features = SA.loc[:, SA.columns] 
IBIS_SA_features = IBIS_SA_features.drop(['Case','Visit','ROI42','ROI117'], axis=1)  
scalerSA = MinMaxScaler()
scaled_SA_data = scalerSA.fit_transform(IBIS_SA_features)

file = pd.ExcelFile("All CTSA 2-1yr.xlsx") #Only includes IBIS subjects that have 1,2 year CT,SA.

ct1y_sheet = file.sheet_names[0]
ct1y_data = file.parse(ct1y_sheet)
ct1y_data_features = ct1y_data.loc[:, ct1y_data.columns] 
ct1y_data_features = ct1y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

ct2y_sheet = file.sheet_names[1]
ct2y_data = file.parse(ct2y_sheet)
ct2y_data_features = ct2y_data.loc[:, ct2y_data.columns] 
ct2y_data_features = ct2y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

sa1y_sheet = file.sheet_names[2]
sa1y_data = file.parse(sa1y_sheet)
sa1y_data_features = sa1y_data.loc[:, sa1y_data.columns] 
sa1y_data_features = sa1y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

sa2y_sheet = file.sheet_names[3]
sa2y_data = file.parse(sa2y_sheet)
sa2y_data_features = sa2y_data.loc[:, sa2y_data.columns] 
sa2y_data_features = sa2y_data_features.drop(['ROI', 11142, 12142], axis=1)  #Already removed medial walls.

scaled_ct1y = scalerSA.transform(ct1y_data_features)
scaled_ct2y = scalerSA.transform(ct2y_data_features)

print(ct1y_data_features.shape)
print(ct2y_data_features.shape)
print(sa1y_data_features.shape)
print(sa2y_data_features.shape)

  import pandas.util.testing as tm


(585, 148)
(585, 148)
(585, 148)
(585, 148)


In [2]:
def create_model(dropout=0.1, layer1_size=110, layer2_size=20, encoded_layer_size=15):
    # create model
    model = Sequential()
    model.add(Dense(layer1_size, input_dim=148, activation='tanh'))
    model.add(Dropout(dropout))
    model.add(Dense(layer2_size, activation='tanh'))
    model.add(Dense(encoded_layer_size, activation='tanh'))
    model.add(Dense(layer2_size, activation='tanh'))
    model.add(Dropout(dropout))
    model.add(Dense(layer1_size, activation='tanh'))
    model.add(Dense(148,activation='tanh'))
    # Compile model
    model.compile(loss='mean_absolute_error', optimizer='adam')
    return model

In [3]:
# create model
model = KerasRegressor(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [10, 15, 20, 25, 30]
epochs = [50, 100, 150] 
dropout_rate = [0.15, 0.2, 0.25]
#Hidden layer sizes
hidden_size1 = [90, 100, 110] 
hidden_size2 = [25, 30, 35, 40] 
encoded_layer_size_array = [15,20,25]

param_grid = dict(batch_size=batch_size, epochs=epochs, dropout=dropout_rate, layer1_size=hidden_size1, layer2_size=hidden_size2, encoded_layer_size=encoded_layer_size_array)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='explained_variance', n_jobs=-1, cv=5)
grid_result = grid.fit(scaled_ct1y,scaled_ct2y)



In [4]:
df = pd.DataFrame(grid_result.cv_results_)
df.to_excel("ExplainedVarianceCT.xlsx")
print('DataFrame is written to Excel File successfully.')

DataFrame is written to Excel File successfully.


In [5]:
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_batch_size,param_dropout,param_encoded_layer_size,param_epochs,param_layer1_size,param_layer2_size,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,4.325980,0.693841,0.149291,0.066292,10,0.15,15,50,90,25,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.994770,-0.837802,0.184736,-1.039623,-1.098401,-0.757172,0.478824,1387
1,4.294821,0.323752,0.100625,0.030303,10,0.15,15,50,90,30,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.946581,-0.651016,0.102750,-0.618222,-1.289672,-0.680548,0.460460,1265
2,4.206800,0.386233,0.091675,0.011198,10,0.15,15,50,90,35,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.615024,-1.094183,0.345948,-0.521842,-1.304449,-0.637910,0.571740,1158
3,4.392288,0.417362,0.142106,0.030337,10,0.15,15,50,90,40,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-0.567883,-1.218443,-0.269972,-0.636195,-1.284053,-0.795309,0.392666,1428
4,4.130071,0.249690,0.170811,0.097510,10,0.15,15,50,100,25,"{'batch_size': 10, 'dropout': 0.15, 'encoded_l...",-1.101774,-0.723681,-0.213353,-1.028486,-1.011643,-0.815788,0.327658,1453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1615,4.605252,0.102316,0.104073,0.022871,30,0.25,25,150,100,40,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.478896,-0.402267,0.349601,-1.280907,-0.580447,-0.478583,0.518809,651
1616,4.596974,0.319319,0.082107,0.017855,30,0.25,25,150,110,25,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.530110,-0.464780,0.410401,-0.383591,-2.107119,-0.615040,0.819901,1084
1617,4.585129,0.144278,0.104187,0.020574,30,0.25,25,150,110,30,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.335339,-0.470338,0.187936,-0.453949,-0.427189,-0.299776,0.248283,116
1618,4.592140,0.092346,0.108739,0.042424,30,0.25,25,150,110,35,"{'batch_size': 30, 'dropout': 0.25, 'encoded_l...",-0.840092,-0.576961,0.105631,-0.247713,-0.151853,-0.342198,0.331277,210


In [6]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.121697 using {'batch_size': 30, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 100, 'layer1_size': 90, 'layer2_size': 40}
-0.757172 (0.478824) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 25}
-0.680548 (0.460460) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 30}
-0.637910 (0.571740) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 35}
-0.795309 (0.392666) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 90, 'layer2_size': 40}
-0.815788 (0.327658) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 100, 'layer2_size': 25}
-0.743673 (0.267373) with: {'batch_size': 10, 'dropout': 0.15, 'encoded_layer_size': 15, 'epochs': 50, 'layer1_size': 100, 'layer2_size': 30}
-0.792856 (0.52