# Data Preparation

In [ ]:
import os, cv2, skimage
from skimage.transform import resize
from sklearn.model_selection import train_test_split
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.utils.np_utils import to_categorical
from keras.utils import print_summary, plot_model
from keras.layers import Conv2D, Dense, Dropout, Flatten, MaxPool2D,Input, Add, GlobalAveragePooling2D, DepthwiseConv2D, BatchNormalization, LeakyReLU
from keras.models import Model , load_model , Sequential
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau
from glob import glob
train_dir = 'data/input/NewData/train/'
test_dir = 'data/input/NewData/test/'
CLASSES = [os.path.basename(folder) for folder in glob(train_dir + '/*')]
CLASSES.sort()
# CLASSES.append("other")

batch_size = 64
imageSize = 64
target_dims = (imageSize, imageSize, 3)
num_classes = 25

In [ ]:
def get_data(folder,limit=10):
    """
    Load the data and labels from the given folder.
    """
    train_len = limit*len(os.listdir(folder))
    print("num_datas:",train_len)
    X = np.empty((train_len, imageSize, imageSize, 3), dtype=np.float32)
    y = np.empty((train_len,), dtype=np.int)
    cnt = 0

    for folderName in os.listdir(folder):
        if not folderName.startswith('.'):
            if folderName in ['A']:
                label = 0
            elif folderName in ['B']:
                label = 1
            elif folderName in ['C']:
                label = 2
            elif folderName in ['D']:
                label = 3
            elif folderName in ['E']:
                label = 4
            elif folderName in ['F']:
                label = 5
            elif folderName in ['G']:
                label = 6
            elif folderName in ['H']:
                label = 7
            elif folderName in ['I']:
                label = 8
            # elif folderName in ['J']:
            #     label = 9
            elif folderName in ['K']:
                label = 10-1
            elif folderName in ['L']:
                label = 11-1
            elif folderName in ['M']:
                label = 12-1
            elif folderName in ['N']:
                label = 13-1
            elif folderName in ['O']:
                label = 14-1
            elif folderName in ['P']:
                label = 15-1
            elif folderName in ['Q']:
                label = 16-1
            elif folderName in ['R']:
                label = 17-1
            elif folderName in ['S']:
                label = 18-1
            elif folderName in ['T']:
                label = 19-1
            elif folderName in ['U']:
                label = 20-1
            elif folderName in ['V']:
                label = 21-1
            elif folderName in ['W']:
                label = 22-1
            elif folderName in ['X']:
                label = 23-1
            elif folderName in ['Y']:
                label = 24-1
            # elif folderName in ['Z']:
            #     label = 25
            # elif folderName in ['del']:
            #     label = 26
            # elif folderName in ['nothing']:
            #     label = 27
            # elif folderName in ['space']:
            #     label = 28           
            else:
                label = 25
            limit = len(os.listdir(folder + folderName)) if limit is None else limit
            # print(f"folder{label}: {limit}")
            for iter,image_filename in enumerate(os.listdir(folder + folderName)):
                if(iter < limit):
                    img_file = cv2.imread(folder + folderName + '/' + image_filename)
                    if img_file is not None:
                        img_file = skimage.transform.resize(img_file, (imageSize, imageSize, 3))
                        img_arr = np.asarray(img_file).reshape((-1, imageSize, imageSize, 3))
                        
                        X[cnt] = img_arr
                        y[cnt] = label
                        # print(y[cnt])
                        cnt += 1
                    # X.append(img_arr)
                    # y.append(label)]
                else: continue
    X = np.asarray(X)
    y = np.asarray(y)
    return X,y


In [4]:
X_train, y_train = get_data(train_dir,300) 

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2) 

# Encode labels to hot vectors (ex : 2 -> [0,0,1,0,0,0,0,0,0,0])
y_trainHot = to_categorical(y_train,num_classes=num_classes)
y_testHot = to_categorical(y_test,num_classes=num_classes)

X_train.shape, y_trainHot.shape, X_test.shape, y_testHot.shape


num_datas: 7200


((5760, 64, 64, 3), (5760, 25), (1440, 64, 64, 3), (1440, 25))

In [5]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape 

((5760, 64, 64, 3), (1440, 64, 64, 3), (5760,), (1440,))

# Data Augmentation

In [ ]:
train_image_generator = ImageDataGenerator(
    samplewise_center=True,
    samplewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

val_image_generator = ImageDataGenerator(
    samplewise_center=True,
    samplewise_std_normalization=True,
)


# test_image_generator = ImageDataGenerator(
#     samplewise_center=True,
#     samplewise_std_normalization=True,
# )


train_generator = train_image_generator.flow(x=X_train, y=y_trainHot, batch_size=batch_size, shuffle=True)
val_generator = val_image_generator.flow(x=X_test, y=y_testHot, batch_size=batch_size, shuffle=False)
# test_generator = test_image_generator.flow_from_directory(train_dir,target_size=(imageSize, imageSize),color_mode="rgb",batch_size=32,class_mode=None,shuffle=False)

# filenames = test_generator.filenames
# nb_samples = len(filenames)
# print("nb_samples:",nb_samples)
# predict = model.predict_generator(test_generator,steps = nb_samples)

# Model SlimCNN

In [ ]:
def slimCNN():
    inputs = Input(shape=target_dims)
    net = Conv2D(32, kernel_size=3, strides=1, padding="same")(inputs)
    net = LeakyReLU()(net)
    net = Conv2D(32, kernel_size=3, strides=1, padding="same")(net)
    net = LeakyReLU()(net)
    net = Conv2D(32, kernel_size=3, strides=2, padding="same")(net)
    net = LeakyReLU()(net)

    net = Conv2D(32, kernel_size=3, strides=1, padding="same")(net)
    net = LeakyReLU()(net)
    net = Conv2D(32, kernel_size=3, strides=1, padding="same")(net)
    net = LeakyReLU()(net)
    net = Conv2D(32, kernel_size=3, strides=2, padding="same")(net)
    net = LeakyReLU()(net)

    shortcut = net

    net = DepthwiseConv2D(kernel_size=3, strides=1, padding='same', kernel_initializer='he_normal')(net)
    net = BatchNormalization(axis=3)(net)
    net = LeakyReLU()(net)
    net = Conv2D(filters=32, kernel_size=1, strides=1, padding='same', kernel_initializer='he_normal')(net)
    net = BatchNormalization(axis=3)(net)
    net = LeakyReLU()(net)

    net = DepthwiseConv2D(kernel_size=3, strides=1, padding='same', kernel_initializer='he_normal')(net)
    net = BatchNormalization(axis=3)(net)
    net = LeakyReLU()(net)
    net = Conv2D(filters=32, kernel_size=1, strides=1, padding='same', kernel_initializer='he_normal')(net)
    net = BatchNormalization(axis=3)(net)
    net = LeakyReLU()(net)

    net = Add()([net, shortcut])

    net = GlobalAveragePooling2D()(net)
    net = Dropout(0.2)(net)

    net = Dense(128, activation='relu')(net)
    outputs = Dense(num_classes, activation='softmax')(net)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])

    return model

model=slimCNN()
model.summary()
# plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)

# Model AlexNet

In [ ]:
def AlexNet():
    model = Sequential()
    model.add(Conv2D(64, kernel_size=4, strides=1, activation='relu', input_shape=target_dims))
    model.add(Conv2D(64, kernel_size=4, strides=2, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Conv2D(128, kernel_size=4, strides=1, activation='relu'))
    model.add(Conv2D(128, kernel_size=4, strides=2, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Conv2D(256, kernel_size=4, strides=1, activation='relu'))
    model.add(Conv2D(256, kernel_size=4, strides=2, activation='relu'))
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(512, activation='relu'))
    outputs = Dense(num_classes, activation='softmax')

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=["accuracy"])
    # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

model=AlexNet()
model.summary()
# plot_model(model, to_file="model.png", show_shapes=True, show_layer_names=True)

# Model custom

In [ ]:
from keras import regularizers
from keras.losses import categorical_crossentropy
def model_custom():
    model = Sequential()
    
    model.add(Conv2D(16, kernel_size = [3,3], padding = 'same', activation = 'relu', input_shape = (64,64,3)))
    model.add(Conv2D(32, kernel_size = [3,3], padding = 'same', activation = 'relu'))
    model.add(MaxPool2D(pool_size = [3,3]))
    
    model.add(Conv2D(32, kernel_size = [3,3], padding = 'same', activation = 'relu'))
    model.add(Conv2D(64, kernel_size = [3,3], padding = 'same', activation = 'relu'))
    model.add(MaxPool2D(pool_size = [3,3]))
    
    model.add(Conv2D(128, kernel_size = [3,3], padding = 'same', activation = 'relu'))
    model.add(Conv2D(256, kernel_size = [3,3], padding = 'same', activation = 'relu'))
    model.add(MaxPool2D(pool_size = [3,3]))
    
    model.add(BatchNormalization())
    
    model.add(Flatten())
    model.add(Dropout(0.5))
    model.add(Dense(512, activation = 'relu', kernel_regularizer = regularizers.l2(0.001)))
    model.add(Dense(num_classes, activation = 'softmax'))
    
    model.compile(optimizer = 'adam', loss = categorical_crossentropy, metrics = ["accuracy"])
    return model

model = model_custom()
model.summary()

#Model VGG16

In [10]:
from keras.applications.vgg16 import VGG16
from keras.optimizers import SGD, RMSprop, Adam, Adagrad, Adadelta, RMSprop
def vgg16():
    vgg_base = VGG16(weights='data\input\pretrain-model/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',include_top=False,input_shape=target_dims)

    #initiate a model
    model = Sequential()

    #Add the VGG base model
    model.add(vgg_base)

    #Add new layers
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))

    #summary of the model
    #Adam=Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
    sgd = SGD(lr=0.001)
    model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
    return model
model = vgg16()
model.summary()


Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 2, 2, 512)         14714688  
_________________________________________________________________
flatten_3 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 25)                51225     
Total params: 14,765,913
Trainable params: 14,765,913
Non-trainable params: 0
_________________________________________________________________


# Training

## fit

In [11]:
from math import ceil
train_log=model.fit(X_train,y_trainHot, epochs=100,
    validation_data=(X_test,y_testHot),
    batch_size=batch_size,
    # steps_per_epoch=int(ceil(len(X_train)/batch_size)),
    # validation_steps=int(ceil(len(X_test)/batch_size)),
    use_multiprocessing=False,
    verbose=2,
    # callbacks=[
    #     # TensorBoard(log_dir='./logs/%s' % (start_time)),
    #     # ModelCheckpoint('./models/%s.h5' % (start_time), monitor='val_acc', verbose=1, save_best_only=True, mode='auto'),
    #     ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=5, verbose=2, mode='auto')]
        )


Train on 5760 samples, validate on 1440 samples
Epoch 1/100
 - 11s - loss: 2.8119 - accuracy: 0.2073 - val_loss: 2.0111 - val_accuracy: 0.4007
Epoch 2/100
 - 7s - loss: 1.2397 - accuracy: 0.6234 - val_loss: 0.5740 - val_accuracy: 0.8382
Epoch 3/100
 - 7s - loss: 0.3906 - accuracy: 0.8938 - val_loss: 0.1162 - val_accuracy: 0.9799
Epoch 4/100
 - 7s - loss: 0.0695 - accuracy: 0.9915 - val_loss: 0.0289 - val_accuracy: 0.9993
Epoch 5/100
 - 7s - loss: 0.0190 - accuracy: 0.9997 - val_loss: 0.0145 - val_accuracy: 1.0000
Epoch 6/100
 - 7s - loss: 0.0106 - accuracy: 0.9998 - val_loss: 0.0091 - val_accuracy: 1.0000
Epoch 7/100
 - 7s - loss: 0.0073 - accuracy: 0.9998 - val_loss: 0.0066 - val_accuracy: 1.0000
Epoch 8/100
 - 7s - loss: 0.0055 - accuracy: 0.9998 - val_loss: 0.0056 - val_accuracy: 1.0000
Epoch 9/100
 - 7s - loss: 0.0044 - accuracy: 0.9998 - val_loss: 0.0043 - val_accuracy: 1.0000
Epoch 10/100
 - 7s - loss: 0.0036 - accuracy: 0.9998 - val_loss: 0.0040 - val_accuracy: 1.0000
Epoch 11/

## fit_generator

In [ ]:
import datetime
start_time = datetime.datetime.now().strftime('%Y_%m_%d_%H_%M_%S')

train_log=model.fit_generator(train_generator, epochs=100, validation_data=val_generator,
    steps_per_epoch=train_generator.__len__(),
    validation_steps=val_generator.__len__(),
    verbose=2,
    callbacks=[
        # TensorBoard(log_dir='./logs/%s' % (start_time)),
        # ModelCheckpoint('./models/%s.h5' % (start_time), monitor='val_acc', verbose=1, save_best_only=True, mode='auto'),
        ReduceLROnPlateau(monitor='val_acc', factor=0.2, patience=5, verbose=2, mode='auto')]
        )

# SAVE Model

In [21]:
import pickle
import time, datetime, pytz

def get_datetime():
    dt = datetime.datetime.fromtimestamp(time.time()).astimezone(pytz.timezone('Asia/Bangkok'))
    return dt.strftime("%Y-%m-%d_%H.%M.%S")
exec_time = get_datetime()

MODEL_NAME = exec_time + '-model'
MODEL_DIR = 'model/myVGG16/'
MODEL_SAVE_DIR = MODEL_DIR + MODEL_NAME + '.h5'
MODEL_SAVE_WEIGHTS_DIR = MODEL_DIR + MODEL_NAME + '.weights.h5'
MODEL_SAVE_TRAIN_LOG_DIR = MODEL_DIR + MODEL_NAME + '-train-log.pickle'
model.save(MODEL_SAVE_DIR)
model.save_weights(MODEL_SAVE_WEIGHTS_DIR)
with open(MODEL_SAVE_TRAIN_LOG_DIR, 'wb') as file:
    pickle.dump(train_log, file)

# LOAD Model

In [22]:
from pathlib import Path
def reload_model():
    old_model_file = Path(MODEL_SAVE_DIR)
    old_weight_file = Path(MODEL_SAVE_WEIGHTS_DIR)
    old_train_log_file = Path(MODEL_SAVE_TRAIN_LOG_DIR)
    if old_model_file.is_file() and old_weight_file.is_file() and old_train_log_file.is_file():
        print("Reloading old model, weights and training log from disk")
        model = load_model(MODEL_SAVE_DIR)
        model.load_weights(MODEL_SAVE_WEIGHTS_DIR)
        with open(MODEL_SAVE_TRAIN_LOG_DIR, 'rb') as file:
            train_log = pickle.load(file)
        print("Done!")
        return model, train_log
    else:
        print("Cannot reload the old model, weight and training log from\n  * \"%s\"\n  * \"%s\"\n  * \"%s\"" 
              % (RELOAD_MODEL_DIR, RELOAD_MODEL_WEIGHTS_DIR, RELOAD_MODEL_TRAIN_LOG_DIR))
        print("Please check if the path is correct or not")
        return None, None

model, train_log = reload_model()

Reloading old model, weights and training log from disk
Done!


# evaluate

In [24]:

X_custom,y_custom=get_data(test_dir,30)#'data/input/NewData/test/'
y_customnHot = to_categorical(y_custom, num_classes=num_classes)
results = model.evaluate(X_custom,y_customnHot,batch_size=1)
print('test loss, test acc:', results)
X_custom.shape

num_datas: 720
test loss, test acc: [0.0028258773899864703, 1.0]


(720, 64, 64, 3)

# evaluate_generator

In [ ]:
results = model.evaluate_generator(val_generator)
print('test loss, test acc:', results)

print('\n# Generate predictions for 3 samples')
predictions = model.predict_generator(val_generator)
print('predictions:', np.argmax(predictions,axis=1))
print('y_testHot:', np.argmax(y_testHot,axis=1))

# Predict

In [27]:
# def read_img_V(image_path):
#     img = load_img(image_path, target_size=(75, 75))
#     image = img_to_array(img)
#     image = np.expand_dims(image, axis=0)
#     image = preprocess_input_(image)
#     return image


def read_image(image_path):
    img_file = cv2.imread(image_path)
    if img_file is not None:
        img_file = skimage.transform.resize(img_file, (imageSize, imageSize, 3))
        img_arr = np.asarray(img_file).reshape((-1, imageSize, imageSize, 3))
        
        return img_arr

CHAR= 'W'
test_path='data/input/NewData/test/'+CHAR+'/'+CHAR+'32.jpg'
img=read_image(test_path)
predictions = model.predict(img)
print('predictions:', CLASSES[(np.argmax(predictions,axis=1))[0]])
print('y_label:', CHAR)


predictions: W
y_label: W
