In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
from keras.constraints import maxnorm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


In [2]:
#ModelCreate#Applies to All grid searches used.
def create_model(input_dim=13, 
                 output_size=3,
                 optimizer='adam', 
                 init_mode='uniform', 
                 activation='relu', 
                 dropout_rate=-1, 
                 weight_constraint=-1, 
                 neurons=16,
                 n_layers=2):
    # layers_n = 4 #tunable var
    # layer_width #same thing as neurons

    # create model
    model = Sequential()
    model.add(Dense(neurons, input_dim=input_dim, kernel_initializer=init_mode, activation=activation))                
    for i in  range(n_layers-1): # hidden layers...
        model.add(Dense(neurons, kernel_initializer=init_mode, activation=activation))                
    model.add(Dense(output_size, kernel_initializer=init_mode, activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [3]:
def gsBatchEpoch(input_dim=13, output_size=3):
    model = KerasClassifier(build_fn=create_model, output_size=output_size, input_dim=input_dim, verbose=0)
    # define the grid search parameters
    batch_size = [10, 20, 40, 60, 80, 100]
    epochs = [10, 50, 100]
    param_grid = dict(batch_size=batch_size, epochs=epochs)
    return model, batch_size, epochs, param_grid

In [4]:
def gsOptimizer(input_dim=13, output_size=3, batch_size=10, epochs=100):
    #####GS2
    # create model
    model = KerasClassifier(build_fn=create_model, output_size=output_size, input_dim=input_dim, verbose=0, batch_size=batch_size, epochs=epochs)

    # define the grid search parameters
    optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
    param_grid = dict(optimizer=optimizer)
    
    return model, optimizer, param_grid

In [5]:
def gsInitMode(input_dim=13, output_size=3, batch_size=10, epochs=100, optimizer='adam'):
    # create model
    model = KerasClassifier(build_fn=create_model, output_size=output_size, input_dim=input_dim, optimizer=optimizer, verbose=0, batch_size=batch_size, epochs=epochs)
    init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
    param_grid = dict(init_mode=init_mode)
    return model, init_mode, param_grid

In [6]:
def gsActivation(input_dim=13, output_size=3, batch_size=10, epochs=100, optimizer='adam', init_mode='uniform'):
    # create model
    model = KerasClassifier(build_fn=create_model, output_size=output_size, input_dim=input_dim, optimizer=optimizer, init_mode=init_mode, verbose=0, batch_size=batch_size, epochs=epochs)
    activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
    param_grid = dict(activation=activation)
    
    return model, activation, param_grid

In [7]:
def gsNeurons(input_dim=13, output_size=3, batch_size=10, epochs=100, optimizer='adam', init_mode='uniform', activation='relu'):
    # create model
    model = KerasClassifier(build_fn=create_model, output_size=output_size, input_dim=input_dim, optimizer=optimizer, init_mode=init_mode, activation=activation, verbose=0, batch_size=batch_size, epochs=epochs)
    neurons = [1, 5, 10, 15, 20, 25, 30]
    param_grid = dict(neurons=neurons)
    return model, neurons, param_grid

In [8]:
def gsLayers(input_dim=13, output_size=3, batch_size=10, epochs=100, optimizer='adam', init_mode='uniform', activation='relu', neurons=16):
    # create model
    model = KerasClassifier(build_fn=create_model, output_size=output_size, input_dim=input_dim, optimizer=optimizer, init_mode=init_mode, activation=activation, verbose=0, batch_size=batch_size, epochs=epochs)
    n_layers = [2, 3, 4, 5, 6, 7, 8, 9, 10]
    param_grid = dict(n_layers=n_layers)
    return model, n_layers, param_grid

In [9]:
def studentToMatrix(Y):
    matchDict = {'H':[1, 0, 0], 'M':[0, 1, 0], 'L':[0, 0, 1]}
    return [matchDict[x[0]] for x in Y.as_matrix()]

In [10]:
def matrixToClass(Y):
    def vec2num(x):
        if x[0] == 1: return 0
        if x[1] == 1: return 1
        if x[2] == 1: return 2
        return -1
    return [vec2num(x) for x in Y]

In [11]:
def colourToMatrix(Y):
    matchDict = {'green':[1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'purple':[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                 'yellow':[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
                 'orange':[0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                 'black':[0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                 'grey':[0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                 'red':[0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                 'blue':[0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
                 'brown':[0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
                 'white':[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]}
    return [matchDict[x[0]] for x in Y.as_matrix()]

In [39]:
def weatherToMatrix(Y):
    matchDict = {'Anchorage':     [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Atlanta':       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Atlantic City': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Calgary':       [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Chicago':       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Denver':        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Edmonton':      [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Gander':        [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Halifax':       [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'London':        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Los Angeles':   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Miami':         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Montreal':      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'New Orleans':   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Ottawa':        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Portland':      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Quebec':   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Raleigh Durham':[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                 'Regina':        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
                 'San Francisco': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
                 'Saskatoon':     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
                 'Seattle':       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                 'Toronto':       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
                 'Vancouver':     [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
                 'Victoria':      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
                 'Winnipeg':      [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]}
                 #Quebec City data needs to be changed to English in the file.
    return [matchDict[x[0]] for x in Y.as_matrix()]

In [16]:
def matrixToCityClass(Y):
    def vec2num(x):
        if x[0] == 1: return 0
        if x[1] == 1: return 1
        if x[2] == 1: return 2
        if x[3] == 1: return 3
        if x[4] == 1: return 4
        if x[5] == 1: return 5
        if x[6] == 1: return 6
        if x[7] == 1: return 7
        if x[8] == 1: return 8
        if x[9] == 1: return 9
        if x[10] == 1: return 10
        if x[11] == 1: return 11
        if x[12] == 1: return 12
        if x[13] == 1: return 13
        if x[14] == 1: return 14
        if x[15] == 1: return 15
        if x[16] == 1: return 16
        if x[17] == 1: return 17
        if x[18] == 1: return 18
        if x[19] == 1: return 19
        if x[20] == 1: return 20
        if x[21] == 1: return 21
        if x[22] == 1: return 22
        if x[23] == 1: return 23
        if x[24] == 1: return 24
        if x[25] == 1: return 25
        return -1
    return [vec2num(x) for x in Y]

In [17]:
def matrixToColourClass(Y):
    def vec2num(x):
        if x[0] == 1: return 0
        if x[1] == 1: return 1
        if x[2] == 1: return 2
        if x[3] == 1: return 3
        if x[4] == 1: return 4
        if x[5] == 1: return 5
        if x[6] == 1: return 6
        if x[7] == 1: return 7
        if x[8] == 1: return 8
        if x[9] == 1: return 9
        return -1
    return [vec2num(x) for x in Y]

In [18]:
def studentSetup():
    ####Education Data Setup
    # load dataset
    dataset = pd.read_csv("slim-xAPI-Edu-Data.csv")
    #students failed if they are in class L.
    #dataset['Failed'] = np.where(dataset['Class'] == 'L', True, False)
    dataset['gender'] = np.where(dataset['gender']=='M',1,0)
    dataset['Relation'] = np.where(dataset['Relation']=='Father',1,0)
    dataset['ParentAnsweringSurvey'] = np.where(dataset['ParentAnsweringSurvey'] == 'Yes', 1, 0)
    dataset['ParentschoolSatisfaction'] = np.where(dataset['ParentschoolSatisfaction'] == 'Yes', 1, 0)
    dataset['AbsentMoreThanWeek'] = np.where(dataset['StudentAbsenceDays'] == 'Above-7', 1, 0)
    dataset['Semester'] = np.where(dataset['Semester'] == 'F', 1, 0)
    X = dataset[['raisedhands', 'VisITedResources', 'SectionID', 'Topic', 'StageID', 'AnnouncementsView', 'Semester', 'Discussion', 'gender', 'Relation', 'ParentAnsweringSurvey', 'ParentschoolSatisfaction', 'AbsentMoreThanWeek']]
    Y = studentToMatrix(dataset[['Class']])
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
    
    return X_train, X_test, Y_train, Y_test

In [19]:
def colourSetup():
    dataset = pd.read_csv("colour-data.csv")
    X = dataset[['R', 'G', 'B']]
    Y = colourToMatrix(dataset[['Label']])
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
    
    return X_train, X_test, Y_train, Y_test

In [65]:
def weatherSetup():
    dataset = pd.read_csv("modified-monthly-data-labelled.csv")
    X = dataset
    X = X.drop('city', axis=1)
    Y = weatherToMatrix(dataset[['city']])
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y)
    
    return X_train, X_test, Y_train, Y_test

In [21]:
def printResult(grid_result):
        #Printing Results / Best Parameters.
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

In [22]:
# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

In [271]:
#####start of Student Data Analysis.
X_train, X_test, Y_train, Y_test = studentSetup()

In [272]:
##### Grid Search 1 - Batch Size and Epochs.
model, batch_size, epochs, param_grid = gsBatchEpoch(input_dim=X_train.shape[1])

In [273]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [274]:
bestBatchSize = grid_result.best_params_.get('batch_size')
bestEpochs = grid_result.best_params_.get('epochs')
printResult(grid_result)

Best: 0.820370 using {'batch_size': 20, 'epochs': 100}
0.687037 (0.022944) with: {'batch_size': 10, 'epochs': 10}
0.808333 (0.014873) with: {'batch_size': 10, 'epochs': 50}
0.806481 (0.022831) with: {'batch_size': 10, 'epochs': 100}
0.672222 (0.011340) with: {'batch_size': 20, 'epochs': 10}
0.763889 (0.028956) with: {'batch_size': 20, 'epochs': 50}
0.820370 (0.007291) with: {'batch_size': 20, 'epochs': 100}
0.661111 (0.004536) with: {'batch_size': 40, 'epochs': 10}
0.720370 (0.016091) with: {'batch_size': 40, 'epochs': 50}
0.812037 (0.017023) with: {'batch_size': 40, 'epochs': 100}
0.666667 (0.000000) with: {'batch_size': 60, 'epochs': 10}
0.697222 (0.002268) with: {'batch_size': 60, 'epochs': 50}
0.745370 (0.019816) with: {'batch_size': 60, 'epochs': 100}
0.666667 (0.000000) with: {'batch_size': 80, 'epochs': 10}
0.699074 (0.004721) with: {'batch_size': 80, 'epochs': 50}
0.753704 (0.012491) with: {'batch_size': 80, 'epochs': 100}
0.666667 (0.000000) with: {'batch_size': 100, 'epochs':

In [275]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToClass(Y_test))

In [276]:
accuracy_score(Y_testing, Y_predict)

0.7583333333333333

In [249]:
##### Grid Search 2 - for best Optimizer.
model, optimizer, param_grid = gsOptimizer(input_dim=X_train.shape[1], batch_size=bestBatchSize, epochs=bestEpochs)

In [206]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [207]:
#From GS2, we get the best optimizer.  Save this for future reference.  Pray it is not SGD.
bestOptimizer = grid_result.best_params_.get('optimizer')
printResult(grid_result)

Best: 0.827778 using {'optimizer': 'Adam'}
0.685185 (0.006929) with: {'optimizer': 'SGD'}
0.798148 (0.012491) with: {'optimizer': 'RMSprop'}
0.749074 (0.014757) with: {'optimizer': 'Adagrad'}
0.747222 (0.046867) with: {'optimizer': 'Adadelta'}
0.827778 (0.011785) with: {'optimizer': 'Adam'}
0.812037 (0.005708) with: {'optimizer': 'Adamax'}
0.737963 (0.056716) with: {'optimizer': 'Nadam'}


In [209]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.78333333333333333

In [210]:
##### Grid Search 4 - for best init_mode
model, init_mode, param_grid = gsInitMode(input_dim=X_train.shape[1], batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer)

In [211]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [212]:
bestInitMode = grid_result.best_params_.get('init_mode')
printResult(grid_result)

Best: 0.812963 using {'init_mode': 'normal'}
0.810185 (0.016718) with: {'init_mode': 'uniform'}
0.762963 (0.010227) with: {'init_mode': 'lecun_uniform'}
0.812963 (0.024251) with: {'init_mode': 'normal'}
0.666667 (0.000000) with: {'init_mode': 'zero'}
0.775926 (0.024880) with: {'init_mode': 'glorot_normal'}
0.742593 (0.024251) with: {'init_mode': 'glorot_uniform'}
0.746296 (0.001309) with: {'init_mode': 'he_normal'}
0.649074 (0.109815) with: {'init_mode': 'he_uniform'}


In [213]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.76666666666666672

In [214]:
##### Grid Search 5 - for best Activation
model, activation, param_grid = gsActivation(input_dim=X_train.shape[1], batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode)

In [215]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [216]:
bestActivation = grid_result.best_params_.get('activation')
printResult(grid_result)

Best: 0.828704 using {'activation': 'relu'}
0.739815 (0.033970) with: {'activation': 'softmax'}
0.822222 (0.020159) with: {'activation': 'softplus'}
0.804630 (0.037291) with: {'activation': 'softsign'}
0.828704 (0.008587) with: {'activation': 'relu'}
0.819444 (0.014164) with: {'activation': 'tanh'}
0.765741 (0.032182) with: {'activation': 'sigmoid'}
0.762963 (0.011416) with: {'activation': 'hard_sigmoid'}
0.760185 (0.018332) with: {'activation': 'linear'}


In [217]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.7416666666666667

In [222]:
##### Grid Search 7 - for best Neurons
model, neurons, param_grid = gsNeurons(input_dim=X_train.shape[1], batch_size=bestBatchSize, activation=bestActivation, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode)

In [223]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [224]:
bestNeurons = grid_result.best_params_.get('neurons')
printResult(grid_result)

Best: 0.824074 using {'neurons': 30}
0.684259 (0.024880) with: {'neurons': 1}
0.715741 (0.010718) with: {'neurons': 5}
0.800000 (0.009886) with: {'neurons': 10}
0.820370 (0.014582) with: {'neurons': 15}
0.818519 (0.005708) with: {'neurons': 20}
0.824074 (0.010227) with: {'neurons': 25}
0.824074 (0.013290) with: {'neurons': 30}


In [226]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.72499999999999998

In [254]:
X_train.shape[1]

3

In [250]:
##### Grid Search 7 - for best Layers.
model, neurons, param_grid = gsLayers(input_dim=X_train.shape[1], batch_size=bestBatchSize, activation=bestActivation, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode, neurons=bestNeurons)

In [251]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

ValueError: Error when checking target: expected dense_817 to have shape (None, 3) but got array with shape (1258, 10)

In [229]:
bestLayers = grid_result.best_params_.get('n_layers')
printResult(grid_result)

Best: 0.821296 using {'n_layers': 3}
0.811111 (0.009072) with: {'n_layers': 2}
0.821296 (0.005708) with: {'n_layers': 3}
0.820370 (0.020580) with: {'n_layers': 4}
0.820370 (0.023277) with: {'n_layers': 5}
0.782407 (0.022376) with: {'n_layers': 6}
0.819444 (0.012628) with: {'n_layers': 7}
0.800000 (0.016355) with: {'n_layers': 8}
0.741667 (0.014164) with: {'n_layers': 9}
0.711111 (0.037474) with: {'n_layers': 10}


In [230]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.73333333333333328

In [233]:
gsKerasHP = pd.DataFrame()

In [234]:
gsKerasHP = gsKerasHP.append({'dataset':'student', 'batch_size':bestBatchSize, 'activation':bestActivation, 'epochs':bestEpochs, 'optimizer':bestOptimizer, 'init_mode':bestInitMode, 'neurons':bestNeurons, 'n_layers':bestLayers}, ignore_index=True)
gsKerasHP

Unnamed: 0,activation,batch_size,dataset,epochs,init_mode,n_layers,neurons,optimizer
0,relu,20.0,student,100.0,normal,3.0,30.0,Adam


In [23]:
#####start of Color Data Analysis
X_train, X_test, Y_train, Y_test = colourSetup()

In [287]:
##### Grid Search 1 - Batch Size and Epochs.
model, batch_size, epochs, param_grid = gsBatchEpoch(input_dim=X_train.shape[1], output_size=10)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [289]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [292]:
bestBatchSize = grid_result.best_params_.get('batch_size')
bestEpochs = grid_result.best_params_.get('epochs')
printResult(grid_result)

Best: 0.942501 using {'batch_size': 20, 'epochs': 100}
0.926285 (0.004138) with: {'batch_size': 10, 'epochs': 10}
0.938633 (0.000980) with: {'batch_size': 10, 'epochs': 50}
0.942342 (0.000375) with: {'batch_size': 10, 'epochs': 100}
0.925543 (0.001459) with: {'batch_size': 20, 'epochs': 10}
0.936884 (0.001964) with: {'batch_size': 20, 'epochs': 50}
0.942501 (0.003062) with: {'batch_size': 20, 'epochs': 100}
0.922099 (0.002596) with: {'batch_size': 40, 'epochs': 10}
0.935718 (0.001560) with: {'batch_size': 40, 'epochs': 50}
0.939322 (0.000640) with: {'batch_size': 40, 'epochs': 100}
0.912295 (0.002219) with: {'batch_size': 60, 'epochs': 10}
0.933015 (0.001654) with: {'batch_size': 60, 'epochs': 50}
0.937573 (0.001436) with: {'batch_size': 60, 'epochs': 100}
0.904716 (0.004353) with: {'batch_size': 80, 'epochs': 10}
0.930790 (0.002848) with: {'batch_size': 80, 'epochs': 50}
0.938050 (0.002366) with: {'batch_size': 80, 'epochs': 100}
0.900159 (0.000130) with: {'batch_size': 100, 'epochs':

In [296]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToColourClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.69157392686804453

In [322]:
##### Grid Search 2 - for best Optimizer.
model, optimizer, param_grid = gsOptimizer(input_dim=X_train.shape[1], output_size=10, batch_size=bestBatchSize, epochs=bestEpochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [323]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [332]:
#From GS2, we get the best optimizer.  Save this for future reference.  Pray it is not SGD.
bestOptimizer = grid_result.best_params_.get('optimizer')
printResult(grid_result)

Best: 0.940700 using {'optimizer': 'Adam'}
0.928776 (0.002586) with: {'optimizer': 'SGD'}
0.939587 (0.004673) with: {'optimizer': 'RMSprop'}
0.935612 (0.002184) with: {'optimizer': 'Adagrad'}
0.936884 (0.000936) with: {'optimizer': 'Adadelta'}
0.940700 (0.001014) with: {'optimizer': 'Adam'}
0.939216 (0.002208) with: {'optimizer': 'Adamax'}
0.916481 (0.001772) with: {'optimizer': 'Nadam'}


In [333]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToColourClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.70588235294117652

In [334]:
##### Grid Search 4 - for best init_mode
model, init_mode, param_grid = gsInitMode(input_dim=X_train.shape[1], output_size=10, batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [335]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [337]:
bestInitMode = grid_result.best_params_.get('init_mode')
printResult(grid_result)

Best: 0.941494 using {'init_mode': 'uniform'}
0.941494 (0.001038) with: {'init_mode': 'uniform'}
0.925914 (0.007874) with: {'init_mode': 'lecun_uniform'}
0.940647 (0.002208) with: {'init_mode': 'normal'}
0.900000 (0.000000) with: {'init_mode': 'zero'}
0.937626 (0.001892) with: {'init_mode': 'glorot_normal'}
0.936248 (0.001817) with: {'init_mode': 'glorot_uniform'}
0.928458 (0.003534) with: {'init_mode': 'he_normal'}
0.921092 (0.009258) with: {'init_mode': 'he_uniform'}


In [338]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToColourClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.69475357710651831

In [27]:
##### Grid Search 5 - for best Activation
model, activation, param_grid = gsActivation(input_dim=X_train.shape[1], output_size=10, batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [28]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [29]:
bestActivation = grid_result.best_params_.get('activation')
printResult(grid_result)

Best: 0.941706 using {'activation': 'softplus'}
0.900000 (0.000000) with: {'activation': 'softmax'}
0.941706 (0.000983) with: {'activation': 'softplus'}
0.902703 (0.003822) with: {'activation': 'softsign'}
0.939693 (0.002773) with: {'activation': 'relu'}
0.917117 (0.003830) with: {'activation': 'tanh'}
0.919767 (0.001549) with: {'activation': 'sigmoid'}
0.922840 (0.003445) with: {'activation': 'hard_sigmoid'}
0.918230 (0.001856) with: {'activation': 'linear'}


In [30]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToColourClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.75039745627980925

In [31]:
##### Grid Search 7 - for best Neurons
model, neurons, param_grid = gsNeurons(input_dim=X_train.shape[1], output_size=10, batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [32]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [33]:
bestNeurons = grid_result.best_params_.get('neurons')
printResult(grid_result)

Best: 0.940859 using {'neurons': 30}
0.901113 (0.001574) with: {'neurons': 1}
0.930896 (0.004536) with: {'neurons': 5}
0.936990 (0.004041) with: {'neurons': 10}
0.939004 (0.002208) with: {'neurons': 15}
0.940700 (0.002698) with: {'neurons': 20}
0.940329 (0.002665) with: {'neurons': 25}
0.940859 (0.002252) with: {'neurons': 30}


In [34]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToColourClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.73926868044515104

In [35]:
##### Grid Search 8 - for best Layers.
model, neurons, param_grid = gsLayers(input_dim=X_train.shape[1], output_size=10, batch_size=bestBatchSize, activation=bestActivation, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode, neurons=bestNeurons)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)


In [36]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [37]:
bestLayers = grid_result.best_params_.get('n_layers')
printResult(grid_result)

Best: 0.941600 using {'n_layers': 2}
0.941600 (0.002402) with: {'n_layers': 2}
0.941388 (0.002803) with: {'n_layers': 3}
0.940541 (0.003500) with: {'n_layers': 4}
0.933757 (0.000761) with: {'n_layers': 5}
0.932379 (0.004247) with: {'n_layers': 6}
0.919979 (0.007239) with: {'n_layers': 7}
0.916110 (0.002668) with: {'n_layers': 8}
0.913831 (0.002606) with: {'n_layers': 9}
0.908956 (0.001281) with: {'n_layers': 10}


In [38]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToColourClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.74721780604133547

In [41]:
gsKerasHP = pd.DataFrame()

In [42]:
gsKerasHP = gsKerasHP.append({'dataset':'color', 'batch_size':bestBatchSize, 'activation':bestActivation, 'epochs':bestEpochs, 'optimizer':bestOptimizer, 'init_mode':bestInitMode, 'neurons':bestNeurons, 'n_layers':bestLayers}, ignore_index=True)
gsKerasHP

Unnamed: 0,activation,batch_size,dataset,epochs,init_mode,n_layers,neurons,optimizer
0,softplus,20.0,color,100.0,uniform,2.0,30.0,Adam


In [69]:
#####start of Weather Data Analysis
X_train, X_test, Y_train, Y_test = weatherSetup()

In [70]:
##### Grid Search 1 - Batch Size and Epochs.
model, batch_size, epochs, param_grid = gsBatchEpoch(input_dim=X_train.shape[1], output_size=26)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [71]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [72]:
bestBatchSize = grid_result.best_params_.get('batch_size')
bestEpochs = grid_result.best_params_.get('epochs')
printResult(grid_result)

Best: 0.979818 using {'batch_size': 10, 'epochs': 100}
0.962601 (0.000925) with: {'batch_size': 10, 'epochs': 10}
0.975170 (0.001415) with: {'batch_size': 10, 'epochs': 50}
0.979818 (0.002838) with: {'batch_size': 10, 'epochs': 100}
0.962158 (0.000490) with: {'batch_size': 20, 'epochs': 10}
0.971762 (0.002227) with: {'batch_size': 20, 'epochs': 50}
0.975923 (0.001474) with: {'batch_size': 20, 'epochs': 100}
0.961538 (0.000000) with: {'batch_size': 40, 'epochs': 10}
0.968000 (0.002828) with: {'batch_size': 40, 'epochs': 50}
0.975392 (0.002000) with: {'batch_size': 40, 'epochs': 100}
0.961538 (0.000000) with: {'batch_size': 60, 'epochs': 10}
0.963530 (0.001508) with: {'batch_size': 60, 'epochs': 50}
0.974020 (0.002077) with: {'batch_size': 60, 'epochs': 100}
0.961538 (0.000000) with: {'batch_size': 80, 'epochs': 10}
0.965079 (0.001901) with: {'batch_size': 80, 'epochs': 50}
0.969018 (0.002714) with: {'batch_size': 80, 'epochs': 100}
0.961538 (0.000000) with: {'batch_size': 100, 'epochs':

In [73]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToCityClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.82068965517241377

In [74]:
##### Grid Search 2 - for best Optimizer.
model, optimizer, param_grid = gsOptimizer(input_dim=X_train.shape[1], output_size=26, batch_size=bestBatchSize, epochs=bestEpochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [75]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [76]:
#From GS2, we get the best optimizer.  Save this for future reference.  Pray it is not SGD.
bestOptimizer = grid_result.best_params_.get('optimizer')
printResult(grid_result)

Best: 0.978357 using {'optimizer': 'Adamax'}
0.914181 (0.033533) with: {'optimizer': 'SGD'}
0.962158 (0.000878) with: {'optimizer': 'RMSprop'}
0.960388 (0.000980) with: {'optimizer': 'Adagrad'}
0.974639 (0.000717) with: {'optimizer': 'Adadelta'}
0.976454 (0.002951) with: {'optimizer': 'Adam'}
0.978357 (0.003089) with: {'optimizer': 'Adamax'}
0.961538 (0.000000) with: {'optimizer': 'Nadam'}


In [77]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToCityClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.81034482758620685

In [78]:
##### Grid Search 4 - for best init_mode
model, init_mode, param_grid = gsInitMode(input_dim=X_train.shape[1], output_size=26, batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [79]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [80]:
bestInitMode = grid_result.best_params_.get('init_mode')
printResult(grid_result)

Best: 0.978003 using {'init_mode': 'uniform'}
0.978003 (0.002980) with: {'init_mode': 'uniform'}
0.753607 (0.035181) with: {'init_mode': 'lecun_uniform'}
0.977516 (0.000945) with: {'init_mode': 'normal'}
0.961538 (0.000000) with: {'init_mode': 'zero'}
0.853899 (0.128947) with: {'init_mode': 'glorot_normal'}
0.796672 (0.059215) with: {'init_mode': 'glorot_uniform'}
0.722758 (0.032304) with: {'init_mode': 'he_normal'}
0.737806 (0.103144) with: {'init_mode': 'he_uniform'}


In [81]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToCityClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.78620689655172415

In [82]:
##### Grid Search 5 - for best Activation
model, activation, param_grid = gsActivation(input_dim=X_train.shape[1], output_size=26, batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [83]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [84]:
bestActivation = grid_result.best_params_.get('activation')
printResult(grid_result)

Best: 0.979641 using {'activation': 'softplus'}
0.961538 (0.000000) with: {'activation': 'softmax'}
0.979641 (0.001690) with: {'activation': 'softplus'}
0.961538 (0.000000) with: {'activation': 'softsign'}
0.978888 (0.001769) with: {'activation': 'relu'}
0.961538 (0.000000) with: {'activation': 'tanh'}
0.961538 (0.000000) with: {'activation': 'sigmoid'}
0.961538 (0.000000) with: {'activation': 'hard_sigmoid'}
0.969107 (0.000382) with: {'activation': 'linear'}


In [85]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToCityClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.73448275862068968

In [86]:
##### Grid Search 7 - for best Neurons
model, neurons, param_grid = gsNeurons(input_dim=X_train.shape[1], output_size=26, batch_size=bestBatchSize, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [87]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [88]:
bestNeurons = grid_result.best_params_.get('neurons')
printResult(grid_result)

Best: 0.980836 using {'neurons': 20}
0.961538 (0.000000) with: {'neurons': 1}
0.962025 (0.000512) with: {'neurons': 5}
0.973975 (0.003095) with: {'neurons': 10}
0.976056 (0.002757) with: {'neurons': 15}
0.980836 (0.001492) with: {'neurons': 20}
0.980703 (0.002114) with: {'neurons': 25}
0.980791 (0.002167) with: {'neurons': 30}


In [89]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToCityClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.82758620689655171

In [90]:
##### Grid Search 8 - for best Layers.
model, neurons, param_grid = gsLayers(input_dim=X_train.shape[1], output_size=26, batch_size=bestBatchSize, activation=bestActivation, epochs=bestEpochs, optimizer=bestOptimizer, init_mode=bestInitMode, neurons=bestNeurons)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)

In [91]:
grid_result = grid.fit(np.array(X_train), np.array(Y_train))

In [92]:
bestLayers = grid_result.best_params_.get('n_layers')
printResult(grid_result)

Best: 0.980172 using {'n_layers': 2}
0.980172 (0.000921) with: {'n_layers': 2}
0.974197 (0.001541) with: {'n_layers': 3}
0.968531 (0.002713) with: {'n_layers': 4}
0.964238 (0.000885) with: {'n_layers': 5}
0.961804 (0.000574) with: {'n_layers': 6}
0.961538 (0.000000) with: {'n_layers': 7}
0.961538 (0.000000) with: {'n_layers': 8}
0.961538 (0.000000) with: {'n_layers': 9}
0.961538 (0.000000) with: {'n_layers': 10}


In [93]:
Y_predict = grid.predict(np.array(X_test))
Y_testing = np.array(matrixToCityClass(Y_test))
accuracy_score(Y_testing, Y_predict)

0.8172413793103448

In [94]:
gsKerasHP = gsKerasHP.append({'dataset':'weather', 'batch_size':bestBatchSize, 'activation':bestActivation, 'epochs':bestEpochs, 'optimizer':bestOptimizer, 'init_mode':bestInitMode, 'neurons':bestNeurons, 'n_layers':bestLayers}, ignore_index=True)
gsKerasHP

Unnamed: 0,activation,batch_size,dataset,epochs,init_mode,n_layers,neurons,optimizer
0,softplus,20.0,color,100.0,uniform,2.0,30.0,Adam
1,softplus,10.0,weather,100.0,uniform,2.0,20.0,Adamax
