In [1]:
import pandas as pd
import numpy as np

colnames = ['CIC0', 'SM1_Dz(Z)', 'GATS1i', 'NdsCH', 'NdssC','MLOGP', 'LC50']
data = pd.read_csv('../data/qsar_fish_toxicity.csv', sep=';', names=colnames)
X = data.drop('LC50', axis=1)
y = data['LC50']

print("Number of Examples in the Dataset = ", X.shape[0])
print("Number of Features for each example = ", X.shape[1])

print("Output Range = [%f, %f]" %(min(y), max(y)))

Number of Examples in the Dataset =  908
Number of Features for each example =  6
Output Range = [0.053000, 9.612000]


In [2]:
from keras.models import Sequential
from keras.layers import Dense

def build_model_1():
    model = Sequential()
    model.add(Dense(4, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(1))
    
    model.compile(loss='mse', optimizer='adam')
    
    return model

def build_model_2():
    model = Sequential()
    model.add(Dense(8, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(2, activation='relu'))
    model.add(Dense(1))
    
    model.compile(loss='mse', optimizer='adam')
    
    return model

def build_model_3():
    model = Sequential()
    model.add(Dense(4, input_dim=X.shape[1], activation='relu'))
    model.add(Dense(2, activation='relu'))
    model.add(Dense(1))
    
    model.compile(loss='mse', optimizer='adam')
    
    return model

In [3]:
from tensorflow import random
seed = 1
np.random.seed(seed)
random.set_seed(seed)

from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
results_1 = []
models = [build_model_1, build_model_2, build_model_3]

for n in range(len(models)):
    model = KerasRegressor(build_fn=models[n], epochs=100, batch_size=20, verbose=0, shuffle=False)
    kf = KFold(n_splits=3)
    result = cross_val_score(model, X, y, cv=kf)
    results_1.append(result)

  model = KerasRegressor(build_fn=models[n], epochs=100, batch_size=20, verbose=0, shuffle=False)
  model = KerasRegressor(build_fn=models[n], epochs=100, batch_size=20, verbose=0, shuffle=False)
  model = KerasRegressor(build_fn=models[n], epochs=100, batch_size=20, verbose=0, shuffle=False)


In [4]:
print("Cross Validation Loss for Model 1 =", abs(results_1[0].mean()))
print("Cross Validation Loss for Model 2 =", abs(results_1[1].mean()))
print("Cross Validation Loss for Model 3 =", abs(results_1[2].mean()))

Cross Validation Loss for Model 1 = 0.9708917339642843
Cross Validation Loss for Model 2 = 0.9561373988787333
Cross Validation Loss for Model 3 = 2.49636443456014


In [5]:
np.random.seed(seed)
random.set_seed(seed)
results_2 = []
epochs = [100, 150]
batches = [20, 15]

for e in range(len(epochs)):
    for b in range(len(batches)):
        model = KerasRegressor(build_fn= build_model_2, epochs= epochs[e], batch_size= batches[b], verbose=0, shuffle=False)
        kf = KFold(n_splits=3)
        result = cross_val_score(model, X, y, cv=kf)
        results_2.append(result)

  model = KerasRegressor(build_fn= build_model_2, epochs= epochs[e], batch_size= batches[b], verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs= epochs[e], batch_size= batches[b], verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs= epochs[e], batch_size= batches[b], verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs= epochs[e], batch_size= batches[b], verbose=0, shuffle=False)


In [6]:
# Print cross validation score for each possible pair of epochs, batch_size
c = 0
for e in range(len(epochs)):
    for b in range(len(batches)):
        print("batch_size =", batches[b],", epochs =", epochs[e], ", Test Loss =", abs(results_2[c].mean()))
        c += 1

batch_size = 20 , epochs = 100 , Test Loss = 2.146423041820526
batch_size = 15 , epochs = 100 , Test Loss = 1.6488028168678284
batch_size = 20 , epochs = 150 , Test Loss = 0.9207906126976013
batch_size = 15 , epochs = 150 , Test Loss = 1.2925216952959697


Build a model with the ideal architecture and hyperparameters after using cross-validation

In [9]:
# Modify build_model_2 function
def build_model_2(activation='relu', optimizer='adam'):
    # build the Keras model_2
    model = Sequential()
    model.add(Dense(8, input_dim=X.shape[1], activation=activation))
    model.add(Dense(1))
    # Compile the model
    model.compile(loss='mean_squared_error', optimizer=optimizer)
    # return the model
    return model

results_3 = []
activations = ['relu', 'tanh']
optimizers = ['sgd', 'adam', 'rmsprop']

# define a seed for random number generator so the result will be reproducible
np.random.seed(seed)
random.set_seed(seed)
# Loop over pairs of activation and optimizer
for o in range(len(optimizers)):
    for a in range(len(activations)):
        optimizer = optimizers[o]
        activation = activations[a]
        model = KerasRegressor(build_fn= build_model_2, epochs=100, batch_size=20, verbose=0, shuffle=False)
        kf = KFold(n_splits=3)
        result = cross_val_score(model, X, y, cv=kf)
        results_3.append(result)

  model = KerasRegressor(build_fn= build_model_2, epochs=100, batch_size=20, verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs=100, batch_size=20, verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs=100, batch_size=20, verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs=100, batch_size=20, verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs=100, batch_size=20, verbose=0, shuffle=False)
  model = KerasRegressor(build_fn= build_model_2, epochs=100, batch_size=20, verbose=0, shuffle=False)


In [10]:
c = 0
for o in range(len(optimizers)):
    for a in range(len(activations)):
        print("activation = ", activations[a],", optimizer = ", optimizers[o], ", Test Loss = ", abs(results_3[c].mean()))
        c += 1

activation =  relu , optimizer =  sgd , Test Loss =  0.9898746013641357
activation =  tanh , optimizer =  sgd , Test Loss =  0.9351770083109537
activation =  relu , optimizer =  adam , Test Loss =  1.000377078851064
activation =  tanh , optimizer =  adam , Test Loss =  0.9615645011266073
activation =  relu , optimizer =  rmsprop , Test Loss =  0.935382068157196
activation =  tanh , optimizer =  rmsprop , Test Loss =  1.0647401213645935
