In [2]:
from tensorflow import keras
import numpy as np
import pandas as pd
import random
import os

dataRep = '../data/'
scriptRep = '../kuzushiji_recognition/'

# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, scriptRep)
import progressBar

In [3]:
testFrac = 0.15
valFrac = 0.15

unicodeData = pd.read_csv(dataRep+'unicode_translation.csv')

raw = np.load('../data/dataset/caracterClassificationFull.npz')
image = np.expand_dims(raw['image'], 3)[:100]
charOutput = raw['characterClass'][:100]

index = np.arange(image.shape[0])
np.random.shuffle(index)
image = image[index]/255.0
charOutput = charOutput[index]

nFrac = int(image.shape[0]*(1.-testFrac))
nVal = int(image.shape[0]*(1.-testFrac-valFrac))
print(image.shape[0], image.shape[1:], nVal, nFrac)

100 (32, 32, 1) 70 85


In [6]:
def trainAndEvaluateModel(model, filenameModel, batchSize):
    
    checkpoint = keras.callbacks.ModelCheckpoint(filepath=filenameModel,
                                                 monitor='val_loss',
                                                 verbose=0,
                                                 save_best_only=True,
                                                 mode='auto', period=1)
    history = keras.callbacks.History()
    reduceLR = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                 factor=0.1, patience=2,
                                                 verbose=0,
                                                 mode='auto')
    earlyStop = keras.callbacks.EarlyStopping(monitor='val_loss',
                                              min_delta=1e-7, patience=5,
                                              verbose=0, mode='auto')
    callbacks = [checkpoint, history, reduceLR, earlyStop]
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(image[:nVal], charOutput[:nVal], epochs=50, batch_size=batchSize,
              validation_data=(image[nVal:nFrac], charOutput[nVal:nFrac]), shuffle=True, callbacks=callbacks)
    
    model = keras.models.load_model(filenameModel)
    test_loss, test_acc = model.evaluate(image[nVal:nFrac], charOutput[nVal:nFrac])
    
    return test_loss


def optimizeHyperParameter(funcCreateModel, filenameModel, dropoutLimit=(0.,1.0), batchSizeLimit=(1,10), convolutionLayerLimit=(2,10), denseLayerLimit=(2,10), maxIter=30):
    
    i=0
    stop=False
    curentParam = [0.5, 5, 4, 8] # (dropout, batchsize, convolution, dense)
    
    stdLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0], 2**(curentParam[2]), 2**(curentParam[3])), filenameModel, 2**(curentParam[1]))
    
    while (i<maxIter) and not stop:
        i+=1
        stop=True
        
        if (curentParam[0]-0.1) >= dropoutLimit[0]:
            print("Test dropout")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]-0.1, 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[0] -= 0.1
                os.system("cp temp.h5 "+filenameModel)
            
        if (curentParam[0]+0.1) <= dropoutLimit[1]:
            print("Test dropout")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]+0.1, 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[0] += 0.1
                os.system("cp temp.h5 "+filenameModel)
                
                
        if (curentParam[1]-1) >= batchSizeLimit[0]:
            print("Test batchsize")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0], 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]-1))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[1] -= 1
                os.system("cp temp.h5 "+filenameModel)
            
        if (curentParam[1]+1) <= batchSizeLimit[1]:
            print("Test batchsize")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0], 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]+1))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[1] += 1
                os.system("cp temp.h5 "+filenameModel)
                
        if (curentParam[2]-1) >= convolutionLayerLimit[0]:
            print("Test convolution")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0], 2**(curentParam[2]-1), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[2] -= 1
                os.system("cp temp.h5 "+filenameModel)
            
        if (curentParam[2]+1) <= convolutionLayerLimit[1]:
            print("Test convolution")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0], 2**(curentParam[2]+1), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[2] += 1
                os.system("cp temp.h5 "+filenameModel)
                
        if (curentParam[3]-1) >= denseLayerLimit[0]:
            print("Test dense")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0], 2**(curentParam[2]), 2**(curentParam[3]-1)),
                                             'temp.h5', 2**(curentParam[1]))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[3] -= 1
                os.system("cp temp.h5 "+filenameModel)
            
        if (curentParam[3]+1) <= denseLayerLimit[1]:
            print("Test dense")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0], 2**(curentParam[2]), 2**(curentParam[3]+1)),
                                             'temp.h5', 2**(curentParam[1]))
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[3] += 1
                os.system("cp temp.h5 "+filenameModel)
                
    print('Dropout :', curentParam[0])
    print('Batch Size :', 2**curentParam[1])
    print('Convolutional layer :', 2**curentParam[2])
    print('Dense Layer :', 2**curentParam[3])
    os.system("rm temp.h5")
        
def createModel1(dropoutRate, convLayer, denseLayer):
    
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(convLayer, (3, 3), activation='relu', input_shape=image.shape[1:]))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.Conv2D(convLayer, (3, 3), activation='relu'))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.MaxPooling2D((2, 2)))
    model.add(keras.layers.Conv2D(convLayer*2, (3, 3), activation='relu'))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.Conv2D(convLayer*2, (3, 3), activation='relu'))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(denseLayer, activation='relu'))
    model.add(keras.layers.Dropout(dropoutRate))
    model.add(keras.layers.Dense(len(unicodeData), activation='softmax'))
    
    return model
    

In [7]:
optimizeHyperParameter(createModel1, '../models/KMNIST1.h5')

W0902 00:29:13.778891 140319686895424 callbacks.py:875] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of samples seen.


Train on 70 samples, validate on 15 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Test dropout


W0902 00:29:19.501131 140319686895424 callbacks.py:875] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of samples seen.


Train on 70 samples, validate on 15 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


W0902 00:29:25.623527 140319686895424 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0902 00:29:25.663111 140319686895424 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0902 00:29:25.706104 140319686895424 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0902 00:29:25.751167 140319686895424 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


Test dropout


W0902 00:29:25.795083 140319686895424 nn_ops.py:4224] Large dropout rate: 0.6 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0902 00:29:25.830719 140319686895424 callbacks.py:875] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of samples seen.


Train on 70 samples, validate on 15 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Test batchsize


W0902 00:29:34.685537 140319686895424 callbacks.py:875] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of samples seen.


Train on 70 samples, validate on 15 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Test batchsize


W0902 00:29:43.660255 140319686895424 callbacks.py:875] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of samples seen.


Train on 70 samples, validate on 15 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Test convolution


W0902 00:29:53.821651 140319686895424 callbacks.py:875] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of samples seen.


Train on 70 samples, validate on 15 samples


KeyboardInterrupt: 

In [5]:
dropoutRate = 0.5

model = keras.models.Sequential()
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=image.shape[1:]))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(dropoutRate))
model.add(keras.layers.Dense(len(unicodeData), activation='softmax'))
model.summary()

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 30, 30, 32)        320       
_________________________________________________________________
spatial_dropout2d_4 (Spatial (None, 30, 30, 32)        0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
spatial_dropout2d_5 (Spatial (None, 28, 28, 32)        0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 12, 12, 64)        18496     
_________________________________________________________________
spatial_dropout2d_6 (Spatial (None, 12, 12, 64)       

In [6]:
model.fit(image[:nFrac], charOutput[:nFrac], epochs=5, validation_split=testFrac/(1.-testFrac))
model.save('../models/KMNIST5.h5')

Train on 478424 samples, validate on 102520 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [7]:
dropoutRate = 0.5

model = keras.models.Sequential()
model.add(keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=image.shape[1:]))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(dropoutRate))
model.add(keras.layers.Dense(len(unicodeData), activation='softmax'))
model.summary()

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_8 (Conv2D)            (None, 30, 30, 16)        160       
_________________________________________________________________
spatial_dropout2d_8 (Spatial (None, 30, 30, 16)        0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 28, 28, 16)        2320      
_________________________________________________________________
spatial_dropout2d_9 (Spatial (None, 28, 28, 16)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 12, 12, 32)        4640      
_________________________________________________________________
spatial_dropout2d_10 (Spatia (None, 12, 12, 32)       

In [8]:
model.fit(image[:nFrac], charOutput[:nFrac], epochs=5, validation_split=testFrac/(1.-testFrac))
model.save('../models/KMNIST2.h5')

Train on 478424 samples, validate on 102520 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
dropoutRate = 0.5

model = keras.models.Sequential()
model.add(keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=image.shape[1:]))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(256, activation='relu'))
model.add(keras.layers.Dropout(dropoutRate))
model.add(keras.layers.Dense(len(unicodeData), activation='softmax'))
model.summary()

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_12 (Conv2D)           (None, 30, 30, 16)        160       
_________________________________________________________________
spatial_dropout2d_12 (Spatia (None, 30, 30, 16)        0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 28, 28, 16)        2320      
_________________________________________________________________
spatial_dropout2d_13 (Spatia (None, 28, 28, 16)        0         
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 12, 12, 32)        4640      
_________________________________________________________________
spatial_dropout2d_14 (Spatia (None, 12, 12, 32)       

In [10]:
model.fit(image[:nFrac], charOutput[:nFrac], epochs=5, validation_split=testFrac/(1.-testFrac))
model.save('../models/KMNIST3.h5')

Train on 478424 samples, validate on 102520 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
dropoutRate = 0.5

model = keras.models.Sequential()
model.add(keras.layers.Conv2D(16, (3, 3), activation='relu', input_shape=image.shape[1:]))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.MaxPooling2D((2, 2)))
model.add(keras.layers.Conv2D(64, (3, 3), activation='relu'))
model.add(keras.layers.SpatialDropout2D(dropoutRate))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(dropoutRate))
model.add(keras.layers.Dense(len(unicodeData), activation='softmax'))
model.summary()

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_16 (Conv2D)           (None, 30, 30, 16)        160       
_________________________________________________________________
spatial_dropout2d_16 (Spatia (None, 30, 30, 16)        0         
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 28, 28, 16)        2320      
_________________________________________________________________
spatial_dropout2d_17 (Spatia (None, 28, 28, 16)        0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 16)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 12, 12, 32)        4640      
_________________________________________________________________
spatial_dropout2d_18 (Spatia (None, 12, 12, 32)       

In [12]:
model.fit(image[:nFrac], charOutput[:nFrac], epochs=5, validation_split=testFrac/(1.-testFrac))
model.save('../models/KMNIST4.h5')

Train on 478424 samples, validate on 102520 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
