# Exercise 2  -  Deep Neural Networks with Keras

In [1]:
import math
import csv
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout
import matplotlib.pyplot as plt

#reproducibility
np.random.seed(12345)

perc_train = 0.8

Using TensorFlow backend.


In [2]:
#import datafile
fname = 'secretkeys_exe.csv'
dataset = np.loadtxt(fname, delimiter=',', dtype=int)
N = len(dataset)
print("Length of the dataset:", N)

L = len(str(dataset[0][0]))
D = 9     #digits
print("Length of a key:", L)
print("number of digits:", D)

Length of the dataset: 3000
Length of a key: 7
number of digits: 9


In [3]:
#one-hot encoding
def expand(S):   
    if (len(str(S))!=L):
        print('mismatch!')
        return []
    x = np.zeros(L*D, dtype=int)
    p = 10**(L-1)
    j = 0
    while j<L:
        q = int(S/p)    #first digit
        # 1 to 9 --> 0 to 8, that's why q-1 in the following line
        x[j*D+(q-1)] = 1
        j += 1
        S = S - q*p
        p = int(p/10)
    return x

x_all = [None]*N 
for i in range(N):
    x_all[i] = expand(dataset[:,0][i])
x_all = np.array(x_all)
y_all = np.array(dataset[:,-1])

#Splitting in train and test set
N_train = int(perc_train*N)
x_train, y_train = x_all[:N_train], y_all[:N_train]
x_test , y_test  = x_all[N_train:], y_all[N_train:]

In [None]:
#implementing the architecture of the network
model = Sequential(name='keys_1')   #sequence of dense layers
model.add(Dense(L*D, input_shape=(L*D,), activation='relu'))
model.add(Dense(max(10, int(L*D/2)), activation='relu'))  #layer with half of the nodes and >= 10
model.add(Dense(max(6, int(L*D/4)), activation='relu'))

model.add(Dropout(0.4)) #removing some nodes
model.add(Dense(1, activation='sigmoid'))
print(model.summary())

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
#Fitting the model
fit = model.fit(x_train, y_train, epochs=80, batch_size=20, validation_data=(x_test, y_test), verbose=False)

In [None]:
fig = plt.figure(figsize=(16,6))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

# accuracy for training and validation(test)
ax1.plot(fit.history['accuracy'])
ax1.plot(fit.history['val_accuracy'])
ax1.set_title('Model accuracy')
ax1.set_ylabel('accuracy')
ax1.set_xlabel('epoch')
ax1.legend(['Train', 'Validation'])

# loss for training and validation(test)
ax2.plot(fit.history['loss'])
ax2.plot(fit.history['val_loss'])
ax2.set_title('Model loss')
ax2.set_ylabel('loss')
ax2.set_xlabel('epoch')
ax2.legend(['Train', 'Validation'])

plt.show()

In [None]:
print('Max accuracy up to 20 epochs:', np.array(fit.history['val_accuracy'][:20]).max())

### **1. Data augmentation**

In [None]:
L_dataset = np.zeros( (N*L, 2), dtype=int )

# rotating the digits of every key
for key,ind in zip(dataset, range(len(dataset))):
    for i in range(L):
        L_dataset[ind*L+i][0] = int( str(key[0])[i:]+str(key[0])[:i] )
        L_dataset[ind*L+i][1] = key[1]

print("Original shape of dataset:", dataset.shape)
print("New shape of dataset:", L_dataset.shape)

np.random.shuffle(L_dataset)

In [None]:
L_x_all = [None]*(N*L)
for i in range(N*L):
    L_x_all[i] = expand(L_dataset[:,0][i])
L_x_all = np.array(L_x_all)
L_y_all = np.array(L_dataset[:,-1])

#Splitting in train and test set
N_train = int(perc_train*N*L)
L_x_train, L_y_train = L_x_all[:N_train], L_y_all[:N_train]
L_x_test, L_y_test   = L_x_all[N_train:], L_y_all[N_train:]

In [None]:
#implementing the architecture of the network
model = Sequential(name='keys_larger')   #sequence of dense layers
model.add(Dense(L*D, input_shape=(L*D,), activation='relu'))
model.add(Dense(max(10, int(L*D/2)), activation='relu'))  #layer with half of the nodes and >= 10
model.add(Dense(max(6, int(L*D/4)), activation='relu'))

model.add(Dropout(0.2)) #removing some nodes
model.add(Dense(1, activation='sigmoid'))
print(model.summary())

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
#Fitting again with more data
fit = model.fit(L_x_train, L_y_train, epochs=80, batch_size=20, validation_data=(L_x_test, L_y_test), verbose=False)

In [None]:
fig = plt.figure(figsize=(16,6))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

# accuracy for training and validation(test)
ax1.plot(fit.history['accuracy'])
ax1.plot(fit.history['val_accuracy'])
ax1.set_title('Model accuracy')
ax1.set_ylabel('accuracy')
ax1.set_xlabel('epoch')
ax1.legend(['Train', 'Validation'])

# loss for training and validation(test)
ax2.plot(fit.history['loss'])
ax2.plot(fit.history['val_loss'])
ax2.set_title('Model loss')
ax2.set_ylabel('loss')
ax2.set_xlabel('epoch')
ax2.legend(['Train', 'Validation'])

plt.show()

In [None]:
print('Max accuracy up to 20 epochs:', np.array(fit.history['val_accuracy'][:20]).max())

### **2. Grid search over hyper-parameters**

In [None]:
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras import optimizers

def compile_model(optimizer=optimizers.Adam(),activation='relu',dropout_rate=0.2):
    # create the mode
    mod = Sequential(name='keys_larger') 
    mod.add(Dense(L*D, input_shape=(L*D,), activation=activation))
    mod.add(Dense(max(10, int(L*D/2)), activation=activation))  #layer with half of the nodes and >= 10
    mod.add(Dense(max(6, int(L*D/4)), activation=activation))

    mod.add(Dropout(dropout_rate)) #removing some nodes
    mod.add(Dense(1, activation='sigmoid'))
    # compile the model
    mod.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    return mod

### Optimizers

In [None]:
batch_size = 20

# call Keras scikit wrapper
model_gridsearch = KerasClassifier(build_fn=compile_model, 
                        epochs=50, 
                        batch_size=batch_size, 
                        verbose=False)

# list of allowed optional arguments for the optimizer, see `compile_model()`
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']

# define parameter dictionary
param_grid = dict(optimizer=optimizer)
# call scikit grid search module
grid = GridSearchCV(estimator=model_gridsearch, param_grid=param_grid, n_jobs=4, cv=4)
grid_result = grid.fit(x_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
best_optimizer = grid_result.best_params_['optimizer']

### Batch size 

In [None]:
batch_size = 20

# call Keras scikit wrapper
model_gridsearch = KerasClassifier(build_fn=compile_model, 
                        epochs=50, 
                        batch_size=batch_size, 
                        verbose=False)

# define the grid search parameters
batch_size = [10, 20, 40]

# define parameter dictionary
param_grid = dict(batch_size=batch_size)
# call scikit grid search module
grid = GridSearchCV(estimator=model_gridsearch, param_grid=param_grid, n_jobs=1, cv=4)
grid_result = grid.fit(x_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    
best_batch_size = grid_result.best_params_['batch_size']

### Activation

In [None]:
batch_size = 20

# call Keras scikit wrapper
model_gridsearch = KerasClassifier(build_fn=compile_model, 
                        epochs=50, 
                        batch_size=batch_size, 
                        verbose=False)

# define the grid search parameters
activation = ['softmax', 'softsign', 'relu', 'tanh', 'sigmoid', 'linear']
# define parameter dictionary
param_grid = dict(activation=activation)

# call scikit grid search module
grid = GridSearchCV(estimator=model_gridsearch, param_grid=param_grid, n_jobs=4, cv=4)
grid_result = grid.fit(x_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

best_activation = grid_result.best_params_['activation']    

### Dropout

In [None]:
batch_size = 20

# call Keras scikit wrapper
model_gridsearch = KerasClassifier(build_fn=compile_model, 
                        epochs=50, 
                        batch_size=batch_size, 
                        verbose=False)

# define the grid search parameters
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
# define parameter dictionary
param_grid = dict(dropout_rate=dropout_rate)

# call scikit grid search module
grid = GridSearchCV(estimator=model_gridsearch, param_grid=param_grid, n_jobs=4, cv=4)
grid_result = grid.fit(x_train,y_train)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

best_dropout_rate = grid_result.best_params_['dropout_rate']    

### All 

In [None]:
mod = Sequential(name='keys_larger') 

activation = best_activation
mod.add(Dense(L*D, input_shape=(L*D,), activation=activation))
mod.add(Dense(max(10, int(L*D/2)), activation=activation))  #layer with half of the nodes and >= 10
mod.add(Dense(max(6, int(L*D/4)), activation=activation))

dropout_rate = best_dropout_rate
mod.add(Dropout(dropout_rate)) #removing some nodes
mod.add(Dense(1, activation='sigmoid'))

optimizer = best_optimizer
# compile the model
mod.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

batch_size = best_batch_size
epochs = 40
#Fitting again with more data
fit = mod.fit(L_x_train, L_y_train, epochs=epochs, batch_size=batch_size, validation_data=(L_x_test, L_y_test), verbose=False)

In [None]:
fig = plt.figure(figsize=(16,6))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

# accuracy for training and validation(test)
ax1.plot(fit.history['accuracy'])
ax1.plot(fit.history['val_accuracy'])
ax1.set_title('Model accuracy')
ax1.set_ylabel('accuracy')
ax1.set_xlabel('epoch')
ax1.legend(['Train', 'Validation'])

# loss for training and validation(test)
ax2.plot(fit.history['loss'])
ax2.plot(fit.history['val_loss'])
ax2.set_title('Model loss')
ax2.set_ylabel('loss')
ax2.set_xlabel('epoch')
ax2.legend(['Train', 'Validation'])

plt.show()

In [None]:
print('Max accuracy up to 20 epochs:', np.array(fit.history['val_accuracy'][:20]).max())

### **3. Data rescaling**

In [None]:
L_x_rescale = (L_x_all - L_x_all.mean()) / L_x_all.std()

#Splitting in train and test set
N_train = int(perc_train*N*L)
L_x_train, L_y_train = L_x_rescale[:N_train], L_y_all[:N_train]
L_x_test, L_y_test   = L_x_rescale[N_train:], L_y_all[N_train:]

In [None]:
mod = Sequential(name='keys_larger') 

activation = best_activation
mod.add(Dense(L*D, input_shape=(L*D,), activation=activation))
mod.add(Dense(max(10, int(L*D/2)), activation=activation))  #layer with half of the nodes and >= 10
mod.add(Dense(max(6, int(L*D/4)), activation=activation))

dropout_rate = best_dropout_rate
mod.add(Dropout(dropout_rate)) #removing some nodes
mod.add(Dense(1, activation='sigmoid'))

optimizer = best_optimizer
# compile the model
mod.compile(loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])

batch_size = best_batch_size
epochs = 100
#Fitting again with more data
fit = mod.fit(L_x_train, L_y_train, epochs=epochs, batch_size=batch_size, validation_data=(L_x_test, L_y_test), verbose=False)

In [None]:
fig = plt.figure(figsize=(16,6))
ax1 = fig.add_subplot(1,2,1)
ax2 = fig.add_subplot(1,2,2)

# accuracy for training and validation(test)
ax1.plot(fit.history['accuracy'])
ax1.plot(fit.history['val_accuracy'])
ax1.set_title('Model accuracy')
ax1.set_ylabel('accuracy')
ax1.set_xlabel('epoch')
ax1.legend(['Train', 'Validation'])

# loss for training and validation(test)
ax2.plot(fit.history['loss'])
ax2.plot(fit.history['val_loss'])
ax2.set_title('Model loss')
ax2.set_ylabel('loss')
ax2.set_xlabel('epoch')
ax2.legend(['Train', 'Validation'])

plt.show()

In [None]:
print('Max accuracy up to 20 epochs:', np.array(fit.history['val_accuracy'][:20]).max())