In [2]:
from tensorflow import keras
import numpy as np
import pandas as pd
import random
import os

dataRep = '../data/'
scriptRep = '../kuzushiji_recognition/'

# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, scriptRep)
import progressBar

In [3]:
testFrac = 0.15
valFrac = 0.15

unicodeData = pd.read_csv(dataRep+'unicode_translation.csv')

raw = np.load('../data/dataset/caracterClassificationFull.npz')
image = np.expand_dims(raw['image'], 3).copy()
charOutput = raw['characterClass'].copy()
del raw

index = np.arange(image.shape[0])
np.random.shuffle(index)
image = image[index]/255.0
charOutput = charOutput[index]
del index

nFrac = int(image.shape[0]*(1.-testFrac))
nVal = int(image.shape[0]*(1.-testFrac-valFrac))
print(image.shape[0], image.shape[1:], nVal, nFrac)

683464 (32, 32, 1) 478424 580944


In [4]:
def trainAndEvaluateModel(model, filenameModel, batchSize):
    
    checkpoint = keras.callbacks.ModelCheckpoint(filepath=filenameModel,
                                                 monitor='val_loss',
                                                 verbose=0,
                                                 save_best_only=True,
                                                 mode='auto', period=1)
    history = keras.callbacks.History()
    reduceLR = keras.callbacks.ReduceLROnPlateau(monitor='val_loss',
                                                 factor=0.1, patience=2,
                                                 verbose=0,
                                                 mode='auto')
    earlyStop = keras.callbacks.EarlyStopping(monitor='val_loss',
                                              min_delta=1e-7, patience=5,
                                              verbose=0, mode='auto')
    callbacks = [checkpoint, history, reduceLR, earlyStop]
    
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(image[:nVal], charOutput[:nVal], epochs=50, batch_size=batchSize,
              validation_data=(image[nVal:nFrac], charOutput[nVal:nFrac]), shuffle=True, callbacks=callbacks)
    
    model = keras.models.load_model(filenameModel)
    test_loss, test_acc = model.evaluate(image[nVal:nFrac], charOutput[nVal:nFrac])
    
    return test_loss


def optimizeHyperParameter(funcCreateModel, filenameModel, dropoutLimit=(0,20), batchSizeLimit=(0,10), convolutionLayerLimit=(2,10), denseLayerLimit=(2,10), maxIter=30):
    
    i=0
    stop=False
    curentParam = [6, 5, 4, 8] # (dropout, batchsize, convolution, dense)
    testedParam = np.zeros((dropoutLimit[1]+1, batchSizeLimit[1]+1, convolutionLayerLimit[1]+1, denseLayerLimit[1]+1), dtype=np.bool)
    
    stdLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]/20., 2**(curentParam[2]), 2**(curentParam[3])), filenameModel, 2**(curentParam[1]))
    testedParam[curentParam[0], curentParam[1], curentParam[2], curentParam[3]] = True
    
    while (i<maxIter) and not stop:
        i+=1
        stop=True
        
        if (curentParam[0]-1) >= dropoutLimit[0] and not testedParam[curentParam[0]-1, curentParam[1], curentParam[2], curentParam[3]]:
            print("Test dropout down")
            testLoss = trainAndEvaluateModel(funcCreateModel(float(curentParam[0]-1)/20., 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            testedParam[curentParam[0]-1, curentParam[1], curentParam[2], curentParam[3]] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[0] -= 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
            
        if (curentParam[0]+1) <= dropoutLimit[1] and not testedParam[curentParam[0]+1, curentParam[1], curentParam[2], curentParam[3]]:
            print("Test dropout up")
            testLoss = trainAndEvaluateModel(funcCreateModel(float(curentParam[0]+1)/20., 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            testedParam[curentParam[0]+1, curentParam[1], curentParam[2], curentParam[3]] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[0] += 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
                
                
        if (curentParam[1]-1) >= batchSizeLimit[0] and not testedParam[curentParam[0], curentParam[1]-1, curentParam[2], curentParam[3]]:
            print("Test batchsize down")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]/20., 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]-1))
            testedParam[curentParam[0], curentParam[1]-1, curentParam[2], curentParam[3]] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[1] -= 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
            
        if (curentParam[1]+1) <= batchSizeLimit[1] and not testedParam[curentParam[0], curentParam[1]+1, curentParam[2], curentParam[3]]:
            print("Test batchsize up")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]/20., 2**(curentParam[2]), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]+1))
            testedParam[curentParam[0], curentParam[1]+1, curentParam[2], curentParam[3]] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[1] += 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
                
        if (curentParam[2]-1) >= convolutionLayerLimit[0] and not testedParam[curentParam[0], curentParam[1], curentParam[2]-1, curentParam[3]]:
            print("Test convolution down")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]/20., 2**(curentParam[2]-1), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            testedParam[curentParam[0], curentParam[1], curentParam[2]-1, curentParam[3]] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[2] -= 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
            
        if (curentParam[2]+1) <= convolutionLayerLimit[1] and not testedParam[curentParam[0]-1, curentParam[1], curentParam[2]+1, curentParam[3]]:
            print("Test convolution up")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]/20., 2**(curentParam[2]+1), 2**(curentParam[3])),
                                             'temp.h5', 2**(curentParam[1]))
            testedParam[curentParam[0]-1, curentParam[1], curentParam[2]+1, curentParam[3]] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[2] += 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
                
        if (curentParam[3]-1) >= denseLayerLimit[0] and not testedParam[curentParam[0]-1, curentParam[1], curentParam[2], curentParam[3]-1]:
            print("Test dense down")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]/20., 2**(curentParam[2]), 2**(curentParam[3]-1)),
                                             'temp.h5', 2**(curentParam[1]))
            testedParam[curentParam[0]-1, curentParam[1], curentParam[2], curentParam[3]-1] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[3] -= 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
            
        if (curentParam[3]+1) <= denseLayerLimit[1] and not testedParam[curentParam[0]-1, curentParam[1], curentParam[2], curentParam[3]+1]:
            print("Test dense up")
            testLoss = trainAndEvaluateModel(funcCreateModel(curentParam[0]/20., 2**(curentParam[2]), 2**(curentParam[3]+1)),
                                             'temp.h5', 2**(curentParam[1]))
            testedParam[curentParam[0]-1, curentParam[1], curentParam[2], curentParam[3]+1] = True
            
            if testLoss < stdLoss:
                stop=False
                stdLoss = testLoss
                curentParam[3] += 1
                os.system("cp temp.h5 "+filenameModel)
                print("New param set", curentParam)
                
    print('Dropout :', curentParam[0])
    print('Batch Size :', 2**curentParam[1])
    print('Convolutional layer :', 2**curentParam[2])
    print('Dense Layer :', 2**curentParam[3])
    os.system("rm temp.h5")
    
    print("Nb iteration", i, "/", maxIter)
    print("Nb case tested", np.sum(testedParam), "/", np.sum(np.ones(testedParam.shape, dtype=np.bool)))
        
def createModel1(dropoutRate, convLayer, denseLayer):
    
    model = keras.models.Sequential()
    model.add(keras.layers.Conv2D(convLayer, (3, 3), activation='relu', input_shape=image.shape[1:]))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.Conv2D(convLayer, (3, 3), activation='relu'))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.MaxPooling2D((2, 2)))
    model.add(keras.layers.Conv2D(convLayer*2, (3, 3), activation='relu'))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.Conv2D(convLayer*2, (3, 3), activation='relu'))
    model.add(keras.layers.SpatialDropout2D(dropoutRate))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(denseLayer, activation='relu'))
    model.add(keras.layers.Dropout(dropoutRate))
    model.add(keras.layers.Dense(len(unicodeData), activation='softmax'))
    
    return model
    

In [5]:
optimizeHyperParameter(createModel1, '../models/KMNIST1.h5')

W0904 14:12:56.266648 139978887604032 deprecation.py:506] From /home/mathieu/miniconda3/envs/mlearning/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0904 14:12:56.527713 139978887604032 callbacks.py:875] `period` argument is deprecated. Please use `save_freq` to specify the frequency in number of samples seen.


Train on 478424 samples, validate on 102520 samples
Epoch 1/50
  5632/478424 [..............................] - ETA: 16:45 - loss: 6.1924 - acc: 0.0332

KeyboardInterrupt: 

In [7]:
import sys
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

for name, size in sorted(((name, sys.getsizeof(value)) for name, value in locals().items()),
                         key= lambda x: -x[1])[:10]:
    print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))

                         image:  5.2 GiB
                    charOutput:  1.3 MiB
                   unicodeData: 687.4 KiB
                          _iii:  9.3 KiB
                           _i4:  9.3 KiB
                           _i7:  587.0 B
                           _i3:  575.0 B
                           _i1:  330.0 B
                           _i2:  330.0 B
                           _oh:  240.0 B


In [8]:
image.shape

(683464, 32, 32, 1)

In [12]:
image.nbytes/1024/1024

5339.5625

In [13]:
image.dtype

dtype('float64')

In [14]:
np.float16

numpy.float16