In [1]:
from scipy  import io as sio
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import accuracy_score

import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D
from keras import backend as K


mat = sio.loadmat('emnist-letters.mat')
data = mat['dataset']

img_rows, img_cols = 28,28


Using TensorFlow backend.


In [2]:
X_train = data['train'][0,0]['images'][0,0]
Y_train = data['train'][0,0]['labels'][0,0]
X_test = data['test'][0,0]['images'][0,0]
Y_test = data['test'][0,0]['labels'][0,0]

print(X_train.shape)
print(X_test.shape)

X_train = X_train.reshape(-1,784).astype('float32')
X_test = X_test.reshape(-1,784).astype('float32')
X_train /= 255.0
X_test /= 255.0

if K.image_data_format() == 'channels_first':
    X_train = X_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    X_test = X_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)



    

Y_train = tf.keras.utils.to_categorical(Y_train - 1).astype('float32')
Y_test = tf.keras.utils.to_categorical(Y_test - 1).astype('float32')

(124800, 784)
(20800, 784)


In [3]:
print(X_train.shape)
print(Y_train.shape)

(124800, 28, 28, 1)
(124800, 26)


In [4]:
M = X_train.shape[0]


M_test = X_test.shape[0]
print(M)
print(M_test)

split_size = M - M_test

print(split_size)

X_train, x_cv = X_train[:split_size], X_train[split_size:]
Y_train, y_cv = Y_train[:split_size], Y_train[split_size:]

124800
20800
104000


In [5]:
print(X_train.shape)
print(x_cv.shape)
print(Y_train.shape)
print(y_cv.shape)
%load_ext tensorboard

(104000, 28, 28, 1)
(20800, 28, 28, 1)
(104000, 26)
(20800, 26)


In [None]:
# hyperparams is a dictionary:
# {'batch_size': 1-400, 
# 'epochs' = 10-15, 
# 'num_conv_layers' = 1-4, 
# 'num_filters': 0-64, 
# 'filter_size': 0-8, 
# 'use_max_pool': 0-1, 
# 'max_pool_size: 1-8'
# 'num_dense_layers: 0-8' 
# 'num_dense_neurons: 1-64'
# 'dense_regularizer: 0-1'
# 'lambda:' 0.00001 - 0.00005'
# 'dense_dropout_value: 0 - 0.5'

import random

num_classes = 26

def clamp(x, small, large):
    return min(max(x, small), large)

def gaussian_int(curr_value, var, range_values):
    return clamp( round(np.random.normal(curr_value, var)), range_values[0], range_values[1])

def gaussian_float(curr_value, var, range_values):
    return clamp( np.random.normal(curr_value, var), range_values[0], range_values[1])

def mutate_keep_architecture(hp):
    hp = hp.copy()
    if random.uniform() < 0.5:
        hp['batch_size'] = gaussian_int(hp['batch_size'], 30, (100, 400))
    if random.uniform() < 0.5:
        hp['epochs'] = gaussian_int(hp['epochs'], 5, (10, 20))
    if random.uniform() < 0.5:
        hp['num_filters'] = gaussian_int(hp['num_filters'], 4, (28, 64))
    if random.uniform() < 0.5:
        hp['filter_size'] = gaussian_int(hp['filter_size'], 1, (1, 8))
    if random.uniform() < 0.5:
        hp['max_pool_size'] = gaussian_int(hp['max_pool_size'], 1, (0, 8))
    if random.uniform() < 0.5:
        hp['num_dense_neurons'] = gaussian_int(hp['num_dense_neurons'], 4, (1, 64))
    if random.uniform() < 0.5:
        hp['dense_regularizer'] = gaussian_int(hp['dense_regularizer'], 0.5, (0, 1))
    if random.uniform() < 0.5:
        hp['lambda'] = gaussian_float(hp['lambda'], .000001, (0.00001, 0.00005))
    if random.uniform() < 0.5:
        hp['num_dense_neurons'] = gaussian_int(hp['num_dense_neurons'], 4, (32, 400))
    if random.uniform() < 0.5:
        hp['dense_dropout_value'] = gaussian_float(hp['dense_dropout_value'], .01, (0, 0.5))
    return hp

def mutate(hp):
    hp = hp.copy()
    if random.uniform() < 0.5:
        hp['batch_size'] = gaussian_int(hp['batch_size'], 30, (100, 400))
    if random.uniform() < 0.5:
        hp['epochs'] = gaussian_int(hp['epochs'], 5, (10, 20))
    if random.uniform() < 0.5:
        hp['num_conv_layers'] = gaussian_int(hp['num_conv_layers'], 1, (1, 4))
    if random.uniform() < 0.5:
        hp['num_filters'] = gaussian_int(hp['num_filters'], 4, (28, 64))
    if random.uniform() < 0.5:
        hp['filter_size'] = gaussian_int(hp['filter_size'], 1, (1, 8))
    #hp['use_max_pool'] = gaussian_int(hp['use_max_pool'], 0.5, (0, 1))
    if random.uniform() < 0.5:
        hp['max_pool_size'] = gaussian_int(hp['max_pool_size'], 1, (0, 8))
    if random.uniform() < 0.5:
        hp['num_dense_layers'] = gaussian_int(hp['num_dense_layers'], 1, (0, 8))
    if random.uniform() < 0.5:
        hp['num_dense_neurons'] = gaussian_int(hp['num_dense_neurons'], 4, (1, 64))
    if random.uniform() < 0.5:
        hp['dense_regularizer'] = gaussian_int(hp['dense_regularizer'], 0.5, (0, 1))
    if random.uniform() < 0.5:
        hp['lambda'] = gaussian_float(hp['lambda'], .000001, (0.00001, 0.00005))
    if random.uniform() < 0.5:
        hp['num_dense_neurons'] = gaussian_int(hp['num_dense_neurons'], 4, (32, 400))
    if random.uniform() < 0.5:
        hp['dense_dropout_value'] = gaussian_float(hp['dense_dropout_value'], .01, (0, 0.5))
    return hp
    
def initialize():
    return {'batch_size': random.choice(range(100,401)), 
    'epochs': random.choice(range(10,20)), 
    'num_conv_layers': random.choice(range(1,4)), 
    'num_filters': random.choice(range(28,128)), 
    'filter_size': random.choice(range(8)), 
    'use_max_pool': 1, # random.choice(range(2)), 
    'max_pool_size': random.choice(range(1,8)),
    'num_dense_layers': random.choice(range(8)), 
    'num_dense_neurons': random.choice(range(32,400)),
    'dense_regularizer' : gaussian_int(0.5, 0.5, (0, 1)),
    'lambda': gaussian_float(0.00003, .000001, (0.00001, 0.00005)),
    'dense_dropout_value': gaussian_float(0.3, .1, (0, 0.5)),
    }

In [None]:
hp = initialize()
print(hp)

In [None]:
print(mutate(hp))


In [None]:

from tensorflow.keras import layers
import datetime

def run_model(hp):
    print(hp)
    model = Sequential()
    d = 28
    for x in range(hp['num_conv_layers']):
        fs = hp['filter_size']
        d -= fs-1
        if d > 0:
            model.add(Conv2D(hp['num_filters'], (fs, fs), activation = 'relu'))
            
    
    if hp['use_max_pool'] == 1:
            ps = hp['max_pool_size']
            if d - (ps-1) > 0:
                model.add(MaxPooling2D(pool_size= (ps, ps)))
    model.add(Flatten())
    for x in range(hp['num_dense_layers']):
        if hp['dense_regularizer'] == 0:
            model.add(Dense(hp['num_dense_neurons'], activation='relu'))
        else:
            model.add(Dense(hp['num_dense_neurons'], activation='relu', kernel_regularizer = keras.regularizers.l2(hp['lambda'])))
    if hp['dense_dropout_value'] > 0:
        model.add(Dropout(hp['dense_dropout_value']))
    model.add(Dense(num_classes, activation='softmax'))
    
    #model.summary()
    
    model.compile(loss = keras.losses.categorical_crossentropy, optimizer = keras.optimizers.Adam(), 
                  metrics = ['accuracy'])
    
    history = model.fit(X_train, Y_train,
          batch_size=hp['batch_size'],
          epochs=hp['epochs'],
          verbose=1,
          validation_data = (x_cv, y_cv))
    
    return model.evaluate(X_test, Y_test, verbose=0)
        

In [None]:
# Ideas based off of this paper http://www.ijmlc.org/vol9/874-L0279.pdf
# Running this toasted my GPU after 2 hrs and performance slowed to a crawl
# DO NOT RUN. Needs more work.


def search():

    score = 0
    iterations = 1
    select_value= 4
    clone_value = 5

    candidates = [(initialize(), score)]

    while score < 0.95:
        print('Start iteration: ', iterations)
        iterations += 1
        
        # sort by test score
        best = candidates[0][0]
        next_gen = [best]
        for x in range(select_value):     # take 5 mutations but keep the main architecture
            next_gen.append(mutate_keep_architecture(best))
        for p in next_gen:
            hp, score = run_model(p)
            print('Score: ', score)
            candidates.append((p, score))
        
        candidates = sorted(candidates, key = lambda x: -x[1])[:4]  # sort and take the top 4
        print(candidates)
        
        candidates2 = []
        for c,s in candidates:
            for _ in range(clone_value):
                candidates2.append(mutate(c))   # take mutations of top 4 with same architecture and mutate arbitrarily
        
        for c in candidates2:
            hp, score = run_model(c)
            print('Score: ', score)
            candidates.append((hp, score))
            
        
        candidates = sorted(candidates, key = lambda x: -x[1])
        score = candidates[0][1]
        
    return candidates[0]
        

search()



In [None]:

# Notes

# batch 512, 30 epochs, 2D Conv with 5x5 kernel size and dropout around 0.25 and 0.2 seems to work best so far
# Hit 96% in training and cross validation, but only 93-94% in test.

# Try adding regularizer and changing dropout value. L2 seems to work better than L1.

# Droput 0.4 Test loss: 0.29938104220307793
# Test accuracy: 0.9275480508804321   15 epochs
 
# Dropout 0.35 Test loss: 0.28941035776184154
# Test accuracy: 0.9307211637496948 under Cond2D 5x5, Cond2D 5x5, 256 dense, dropout  15 epochs

##############################
# changed architecture try with more dense layers
# seems to have higher test Cond2D 5x5, Cond2D 5x5, 3x 100 dense with dropout
# Test loss: 0.20129575222587356
# Test accuracy: 0.9350481033325195  15 epochs

###########################
# changed to Cond2D 7x7, Cond2D 5x5, maxpool 2, 200 dense seems to learn very fast but prone to overfitting
# batch size 400
# added dropout 0.2


############################
# Tried Cond2D 9x9 32, Cond2D 5x5 64, Cond2D 3x3 128, MaxPooling 2, 

# Seems a high kernel_filter works better. Below is the best hand tuned version
# Conv2D 32, 9x9, Conv2D 64, 5x5, Conv2D 128 3x3, max pooling 7x7, dropout .25, 2x 400 Dense, dropout .3

            
    

In [6]:
from tensorflow.keras import layers
import datetime


batch_size = 400
num_classes = 26
epochs = 17


model = Sequential()
model.add(Conv2D(32, kernel_size=(9, 9),
                 activation='relu',
                 input_shape=input_shape))
model.add(Conv2D(64, (5, 5), activation='relu'))
model.add(Conv2D(128, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(7, 7)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(400, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(400, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation='softmax'))

model.summary()

#log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)


model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy']
             )

history = model.fit(X_train, Y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data = (x_cv, y_cv))
         #, callbacks=[tensorboard_callback])
score = model.evaluate(X_test, Y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 20, 20, 32)        2624      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 16, 16, 64)        51264     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 14, 14, 128)       73856     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 2, 2, 128)         0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 2, 2, 128)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 400)             