In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

import numpy as np
from tensorflow.keras.layers import Average, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D, Reshape, TimeDistributed
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model as KerasModel

import sys
sys.path.append('..')

from text_recognizer.datasets.emnist import EmnistDataset
from text_recognizer.models.emnist_mlp import EmnistMlp
from training.util import evaluate_model, train_model

%load_ext autoreload
%autoreload 2

In [7]:
# Simple lenet

def lenet(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Reshape((image_height, image_width, 1), input_shape=(image_height, image_width)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()
    return model


data = EmnistDataset()
nn = lenet(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train,
    y_train=data.y_train,
    loss='categorical_crossentropy',
    epochs=1,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_3 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)               1179776   
_________________________________________________________________
dense_9 (Dense)              (None, 65)                8385      
Total para

<tensorflow.python.keras.callbacks.History at 0x7f07ab3ef898>

In [9]:
# Replacing FCs with all convs

def lenet2(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Reshape((image_height, image_width, 1), input_shape=(image_height, image_width)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (12, 12), activation='relu'))
    model.add(Conv2D(num_classes, (1, 1), activation='softmax'))
    model.add(Flatten())
    model.summary()
    return model


data = EmnistDataset()
nn = lenet2(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train,
    y_train=data.y_train,
    loss='categorical_crossentropy',
    epochs=1,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_5 (Reshape)          (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_15 (Conv2D)           (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 12, 12, 64)        0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 1, 1, 128)         1179776   
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 1, 1, 65)          8385      
_________________________________________________________________
flatten_6 (Flatten)          (None, 65)                0         
Total para

<tensorflow.python.keras.callbacks.History at 0x7f085a3299b0>

In [11]:
# Time-distributing lenet over two non-overlapping patches of the image

def lenet_td(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    inputs = Input(shape=(image_height, image_width))
    reshaped_inputs = Reshape((2, image_height, image_width // 2, 1))(inputs)
    td_outputs = TimeDistributed(model)(reshaped_inputs)
    flat_td_outputs = Flatten()(td_outputs)
    outputs = Dense(num_classes, activation='softmax')(flat_td_outputs)
    model2 = KerasModel(inputs=inputs, outputs=outputs)
    
    model2.summary()
    return model2


data = EmnistDataset()
nn = lenet_td(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train,
    y_train=data.y_train,
    loss='categorical_crossentropy',
    epochs=1,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 28, 28)            0         
_________________________________________________________________
reshape_7 (Reshape)          (None, 2, 28, 14, 1)      0         
_________________________________________________________________
time_distributed_1 (TimeDist (None, 2, 65)             518849    
_________________________________________________________________
flatten_10 (Flatten)         (None, 130)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 65)                8515      
Total params: 527,364
Trainable params: 527,364
Non-trainable params: 0
_________________________________________________________________
Train on 523449 samples, validate on 174483 samples
Epoch 1/1
Training took 26.228385 s


<tensorflow.python.keras.callbacks.History at 0x7f0852e93e80>

In [22]:
# All-conv solution that does the same thing

def lenet22(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Reshape((image_height * 2, image_width // 2, 1), input_shape=(image_height, image_width)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(image_height, image_width, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (13, 5), strides=(13, 1), activation='relu'))
    model.add(Conv2D(num_classes, (1, 1), activation='softmax'))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()
    return model


data = EmnistDataset()
nn = lenet22(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train,
    y_train=data.y_train,
    loss='categorical_crossentropy',
    epochs=1,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_18 (Reshape)         (None, 56, 14, 1)         0         
_________________________________________________________________
conv2d_59 (Conv2D)           (None, 54, 12, 32)        320       
_________________________________________________________________
conv2d_60 (Conv2D)           (None, 52, 10, 64)        18496     
_________________________________________________________________
max_pooling2d_19 (MaxPooling (None, 26, 5, 64)         0         
_________________________________________________________________
conv2d_61 (Conv2D)           (None, 2, 1, 128)         532608    
_________________________________________________________________
conv2d_62 (Conv2D)           (None, 2, 1, 65)          8385      
_________________________________________________________________
flatten_18 (Flatten)         (None, 130)               0         
__________

<tensorflow.python.keras.callbacks.History at 0x7f084fada9b0>

In [5]:
# All-conv solution that does the same thing

def lenet22(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Reshape((image_height * 2, image_width // 2, 1), input_shape=(image_height, image_width)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(image_height, image_width, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (13, 5), strides=(13, 1), activation='relu', padding='same'))
    model.add(Conv2D(num_classes, (1, 1), activation='softmax'))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()
    return model


data = EmnistDataset()
nn = lenet22(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train,
    y_train=data.y_train,
    loss='categorical_crossentropy',
    epochs=1,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_3 (Reshape)          (None, 56, 14, 1)         0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 54, 12, 32)        320       
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 52, 10, 64)        18496     
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 26, 5, 64)         0         
_________________________________________________________________
conv2d_13 (Conv2D)           (None, 2, 5, 128)         639104    
_________________________________________________________________
conv2d_14 (Conv2D)           (None, 2, 5, 65)          8385      
_________________________________________________________________
flatten_2 (Flatten)          (None, 650)               0         
__________

<tensorflow.python.keras.callbacks.History at 0x7fb88ef8fda0>