In [1]:
import tensorflow as tf
import keras
import pandas as pd
import matplotlib.pyplot as plt
import scipy
import sklearn
from sklearn.model_selection import train_test_split
import numpy as np
import random


In [2]:
def build_MLP(n_nodes=200,activation_func="relu",learning_rate=0.01,input_Shape=[28,28],output_shape=10,use_dropout=False,dropout_rate=0.2,use_l1=False,use_l2=False,l1=0.005,l2=0.001):
    model = keras.models.Sequential()
    model.add(keras.layers.Flatten(input_shape=input_Shape))
    
    if use_l1 and use_l2==False:
        model.add(keras.layers.Dense(n_nodes, activation=activation_func))
    elif use_l1 and use_l2==True:
        model.add(keras.layers.Dense(n_nodes, activation=activation_func,kernel_regularizer=keras.regularizers.L1L2(l1=l1,l2=l2)))
    elif use_l1==True:
        model.add(keras.layers.Dense(n_nodes, activation=activation_func,kernel_regularizer=keras.regularizers.L1(l1=l1)))
    else:
        model.add(keras.layers.Dense(n_nodes, activation=activation_func,kernel_regularizer=keras.regularizers.L2(l2=l2)))

    if use_dropout==True:
        model.add(keras.layers.Dropout(rate=dropout_rate))
        
    model.add(keras.layers.Dense(output_shape, activation="softmax"))
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    model.compile(loss="sparse_categorical_crossentropy",optimizer=optimizer, metrics=["accuracy"])
    return model

def build_cnn(n_nodes=128,activation_func="relu",lr=0.01,input_shape=[28,28,1],output_shape=10,use_dropout=True,dropout_rate=0.5,use_l1=False,use_l2=False,l1=0.005,l2=0.005):
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(64,7,activation=activation_func,padding="same",input_shape=input_shape))
    model.add(keras.layers.MaxPooling2D(2))
    model.add(keras.layers.Conv2D(128,3,activation=activation_func,padding="same"))
    model.add(keras.layers.Conv2D(128,3,activation=activation_func,padding="same"))
    model.add(keras.layers.MaxPooling2D(2))
    model.add(keras.layers.Flatten())

    if use_l1 and use_l2==False:
        model.add(keras.layers.Dense(n_nodes,activation=activation_func))
    elif use_l1 and use_l2==True:
        model.add(keras.layers.Dense(n_nodes,activation=activation_func,kernel_regularizer=keras.regularizers.L1L2(l1=l1,l2=l2)))
    elif use_l1==True:
        model.add(keras.layers.Dense(n_nodes,activation=activation_func,kernel_regularizer=keras.regularizers.L1(l1=l1)))
    else:
        model.add(keras.layers.Dense(n_nodes,activation=activation_func,kernel_regularizer=keras.regularizers.L2(l2=l2)))

    if use_dropout==True:
        model.add(keras.layers.Dropout(dropout_rate))

    if use_l1 and use_l2==False:
        model.add(keras.layers.Dense(n_nodes/2,activation=activation_func))
    elif use_l1 and use_l2==True:
        model.add(keras.layers.Dense(n_nodes/2,activation=activation_func,kernel_regularizer=keras.regularizers.L1L2(l1=l1,l2=l2)))
    elif use_l1==True:
        model.add(keras.layers.Dense(n_nodes/2,activation=activation_func,kernel_regularizer=keras.regularizers.L1(l1=l1)))
    else:
        model.add(keras.layers.Dense(n_nodes/2,activation=activation_func,kernel_regularizer=keras.regularizers.L2(l2=l2)))

    if use_dropout==True:
        model.add(keras.layers.Dropout(dropout_rate))
        
    model.add(keras.layers.Dense(output_shape,activation="softmax"))

    optimizer = keras.optimizers.SGD(learning_rate=lr)
    model.compile(loss="sparse_categorical_crossentropy",optimizer=optimizer, metrics=["accuracy"])
    return model

test_model=build_cnn()
test_model.summary()


In [3]:
fashion_mnist=keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_test=X_test/255.0

x_train,x_val,y_train,y_val=sklearn.model_selection.train_test_split(X_train_full/255.0,y_train_full,test_size=0.1,random_state=42) #random state is used to replicate results




In [5]:
#hyper parameter tuning through random search
tests=200
n_nodes=np.arange(100)+99
activation_functions=["relu"]
learning_rates=[0.02,0.03]
dropout_changes=[0.1,0.5]
l1_values=[0.01,0.08]
l2_values=[0.1,0.01]
used_parameters=[]
gained_scores=[]
early_stoping=keras.callbacks.EarlyStopping(patience=3,baseline=0.35)
for i in range(tests):
    print("search "+str(i+1))
    nodes=n_nodes[random.randint(0,len(n_nodes)-1)]
    activation=activation_functions[random.randint(0,len(activation_functions)-1)]
    learning_rate=random.uniform(learning_rates[0],learning_rates[1])

    dropout_rate=random.uniform(dropout_changes[0],dropout_changes[1])
    #use_dropout=random.randint(0,1)
    use_dropout=1 #good predictors seem to use dropout
    
    l1_value=random.uniform(l1_values[0],l1_values[1])
    l2_value=random.uniform(l2_values[0],l2_values[1])
    #use_l1=random.randint(0,1)
    use_l1=1 #some of the top predictors usaaly use l1
    use_l2=random.randint(0,1)
    

    used_parameters.append([nodes,activation,learning_rate,use_dropout,dropout_rate,use_l1,l1_value,use_l2,l2_value])

    model=build_MLP(n_nodes=nodes,activation_func=activation,learning_rate=learning_rate,use_dropout=use_dropout,dropout_rate=dropout_rate,use_l1=use_l1,use_l2=use_l2,l1=l1_value,l2=l2_value)
    history = model.fit(x_train, y_train, epochs=20,validation_data=(x_val, y_val),callbacks=[early_stoping],verbose=0)
    gained_scores.append(model.evaluate(X_test, y_test,verbose=1)[1])


#print(used_parameters)
#print(gained_scores)

results=zip(gained_scores,used_parameters)
three_best=sorted(results,reverse=True)[:3]
print(three_best)


search 1
search 2
search 3
search 4
search 5
search 6
search 7
search 8
search 9
search 10
search 11
search 12
search 13
search 14
search 15
search 16
search 17
search 18
search 19
search 20
search 21
search 22
search 23
search 24
search 25
search 26
search 27
search 28
search 29
search 30
search 31
search 32
search 33
search 34
search 35
search 36
search 37
search 38
search 39
search 40
search 41
search 42
search 43
search 44
search 45
search 46
search 47
search 48
search 49
search 50
search 51
search 52
search 53
search 54
search 55
search 56
search 57
search 58
search 59
search 60
search 61
search 62
search 63
search 64
search 65
search 66
search 67
search 68
search 69
search 70
search 71
search 72
search 73
search 74
search 75
search 76
search 77
search 78
search 79
search 80
search 81
search 82
search 83
search 84
search 85
search 86
search 87
search 88
search 89
search 90
search 91
search 92
search 93
search 94
search 95
search 96
search 97
search 98
search 99
search 100
search 1

In [10]:
tests=40
n_nodes=np.arange(180,step=2)+20
activation_functions=["elu","relu"]
learning_rates=[0.02,0.03]
dropout_changes=[0.05,0.5]
l1_values=[0.01,0.1]
l2_values=[0.01,0.1]
used_cnn_parameters=[]
gained_cnn_scores=[]
early_stoping=keras.callbacks.EarlyStopping(patience=3,baseline=0.35) #voeg baseline=0.35 toe?

for i in range(tests):
    print("search "+str(i+1))
    nodes=n_nodes[random.randint(0,len(n_nodes)-1)]
    activation=activation_functions[random.randint(0,len(activation_functions)-1)]
    learning_rate=random.uniform(learning_rates[0],learning_rates[1])

    dropout_rate=random.uniform(dropout_changes[0],dropout_changes[1])
    use_dropout=random.randint(0,1)
    use_dropout=0
    
    l1_value=random.uniform(l1_values[0],l1_values[1])
    l2_value=random.uniform(l2_values[0],l2_values[1])
    use_l1=random.randint(0,1)
    #use_l1=0
    use_l2=random.randint(0,1)
    use_l2=0
    
    used_cnn_parameters.append([nodes,activation,learning_rate,use_dropout,dropout_rate,use_l1,l1_value,use_l2,l2_value])

    model=build_cnn(n_nodes=nodes,activation_func=activation,lr=learning_rate,use_dropout=use_dropout,dropout_rate=dropout_rate,use_l1=use_l1,l1=l1_value,use_l2=use_l2,l2=l2_value)
    history=model.fit(x_train, y_train, epochs=20,validation_data=(x_val, y_val),callbacks=[early_stoping],verbose=0)
    gained_cnn_scores.append(model.evaluate(X_test, y_test,verbose=1)[1])


results_cnn=zip(gained_cnn_scores,used_cnn_parameters)
three_best_cnn=sorted(results_cnn,reverse=True)[:3]
print(three_best_cnn)
    

search 1
search 2
search 3
search 4
search 5
search 6
search 7
search 8
search 9
search 10
search 11
search 12
search 13
search 14
search 15
search 16
search 17
search 18
search 19
search 20
search 21
search 22
search 23
search 24
search 25
search 26
search 27
search 28
search 29
search 30
search 31
search 32
search 33
search 34
search 35
search 36
search 37
search 38
search 39
search 40
[(0.8497999906539917, [50, 'elu', 0.027618184070042955, 1, 0.2173192855863577, 0, 0.0717181115705109, 0, 0.0156809477206767]), (0.8496000170707703, [78, 'elu', 0.028668438976123152, 0, 0.231771727916114, 0, 0.012673655299806064, 0, 0.015721268082082775]), (0.8458999991416931, [64, 'relu', 0.028146675596453015, 0, 0.08444573872260852, 0, 0.03477412349013479, 0, 0.013368654697460867])]


In [7]:
try:
    uitkomst=three_best
except: #backup if you don't want to run randomsearch
    #uitkomst=[(0.885200023651123, [197, 'relu', 0.02691421553000728]), (0.8784999847412109, [130, 'relu', 0.028888078594726852]), (0.876800000667572, [112, 'relu', 0.023793749068561666])]
    #uitkomst=[(0.8208000063896179, [174, 'elu', 0.0008268841975646185, 0, 0.3338340177764448, 1, 0.09372398395062659, 0, 0.053582953154279654]), (0.7202000021934509, [10, 'sigmoid', 0.01148398879811415, 1, 0.4006585744872907, 0, 0.010527428959041466, 0, 0.04913666547157445]), (0.4521999955177307, [167, 'relu', 0.010547466364222645, 1, 0.1602456164262665, 1, 0.024317491469568457, 1, 0.0745308236451018])]
    #uitkomst=[(0.8798999786376953, [182, 'relu', 0.027409160691256023, 1, 0.3334784567987052, 1, 0.02982165793166822, 0, 0.007031906578528216]), (0.879800021648407, [145, 'relu', 0.02729055405722519, 0, 0.4610011391590515, 1, 0.02652740278337458, 0, 0.02585835056204301]), (0.8776999711990356, [192, 'relu', 0.0206948967058913, 0, 0.1654141140913743, 1, 0.036495299790516265, 0, 0.044509685447334475])]
    uitkomst=[(0.8817999958992004, [191, 'relu', 0.0246616209489024, 1, 0.18823801829505601, 1, 0.01645549960081685, 0, 0.08058449456280865]), (0.8815000057220459, [189, 'relu', 0.02609145356995761, 1, 0.13722164990138422, 1, 0.05551666092484774, 0, 0.08007681223939539]), (0.8805999755859375, [180, 'relu', 0.02763678721511088, 1, 0.26128711840892327, 1, 0.06631739136167594, 0, 0.04818606530052301])]
early_stoping=keras.callbacks.EarlyStopping(patience=3)
cifar10=keras.datasets.cifar10
(X_train_full_cifar, y_train_full_cifar), (X_test_cifar, y_test_cifar) = cifar10.load_data()
X_test_cifar=X_test_cifar/255.0
x_train_cifar,x_val_cifar,y_train_cifar,y_val_cifar=sklearn.model_selection.train_test_split(X_train_full_cifar/255.0,y_train_full_cifar,test_size=0.1,random_state=40)

#voeg dropout en l kansen toe
for i in uitkomst:
    model=build_MLP(n_nodes=i[1][0],activation_func=i[1][1],learning_rate=i[1][2],input_Shape=[32, 32,3],use_dropout=i[1][3],dropout_rate=i[1][4],use_l1=i[1][5],l1=i[1][6],use_l2=i[1][7],l2=i[1][8])
    history = model.fit(x_train_cifar, y_train_cifar, epochs=20,validation_data=(x_val_cifar, y_val_cifar),callbacks=[early_stoping],verbose=0)
    print(model.evaluate(X_test_cifar, y_test_cifar,verbose=1)[1])

0.37450000643730164
0.4415999948978424
0.44339999556541443


In [8]:
try:
    uitkomst=three_best_cnn
except:
    #uitkomst=[[152, 'relu', 0.013948888503957185, 1, 0.23459332016242912, 1, 0.001470938364453039, 0, 0.036419380744576964]] #uitkomst hiervan was 0.9+
    uitkomst=[(0.9108999967575073, [48, 'relu', 0.026602485454774543, 0, 0.41063095084510204, 1, 0.025218638919424448, 0, 0.09730855307277038]), (0.8438000082969666, [112, 'elu', 0.022605120963159288, 0, 0.47129153002835383, 0, 0.009900543264121233, 1, 0.03346900177229244]), (0.8429999947547913, [176, 'elu', 0.011500466479661183, 0, 0.15104488161796972, 0, 0.026190315768811194, 1, 0.033232448970334706])]
early_stoping=keras.callbacks.EarlyStopping(patience=3)
cifar10=keras.datasets.cifar10
(X_train_full_cifar, y_train_full_cifar), (X_test_cifar, y_test_cifar) = cifar10.load_data()
X_test_cifar=X_test_cifar/255.0
x_train_cifar,x_val_cifar,y_train_cifar,y_val_cifar=sklearn.model_selection.train_test_split(X_train_full_cifar/255.0,y_train_full_cifar,test_size=0.1,random_state=40)

for i in uitkomst:
    model=build_cnn(n_nodes=i[1][0],activation_func=i[1][1],input_shape=[32,32,3],lr=i[1][2],use_dropout=i[1][3],dropout_rate=i[1][4],use_l1=i[1][5],l1=i[1][6],use_l2=i[1][7],l2=i[1][8])
    history = model.fit(x_train_cifar, y_train_cifar, epochs=20,validation_data=(x_val_cifar, y_val_cifar),callbacks=[early_stoping],verbose=0)
    print(model.evaluate(X_test_cifar, y_test_cifar,verbose=1)[1])

0.6789000034332275
0.630299985408783
0.6193000078201294
