In [1]:
import numpy as np
from tensorflow.keras.layers import Average, Conv2D, Dense, Dropout, Flatten, Input, MaxPooling2D, Reshape, TimeDistributed
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model as KerasModel



import sys
sys.path.append('..')

from text_recognizer.datasets.emnist import EmnistDataset
from text_recognizer.models.emnist_mlp import EmnistMlp
from text_recognizer.train.util import evaluate_model, train_model

%load_ext autoreload
%autoreload 2

In [12]:
# Simple lenet

def lenet(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Reshape((28, 28, 1), input_shape=(784,)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(image_height, image_width, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()
    return model


data = EmnistDataset()
nn = lenet(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train[:1000],
    y_train=data.y_train[:1000],
    loss='categorical_crossentropy',
    epochs=5,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_10 (Reshape)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_29 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_30 (Conv2D)           (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 12, 12, 64)        0         
_________________________________________________________________
flatten_6 (Flatten)          (None, 9216)              0         
_________________________________________________________________
dense_13 (Dense)             (None, 128)               1179776   
_________________________________________________________________
dense_14 (Dense)             (None, 65)                8385      
Total para

<tensorflow.python.keras.callbacks.History at 0x129378e80>

In [13]:
# Replacing FCs with all convs

def lenet2(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Reshape((28, 28, 1), input_shape=(784,)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(image_height, image_width, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (12, 12), activation='relu'))
    model.add(Conv2D(num_classes, (1, 1), activation='softmax'))
    model.add(Flatten())
    model.summary()
    return model


data = EmnistDataset()
nn = lenet2(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train[:1000],
    y_train=data.y_train[:1000],
    loss='categorical_crossentropy',
    epochs=5,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_11 (Reshape)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 26, 26, 32)        320       
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 24, 24, 64)        18496     
_________________________________________________________________
max_pooling2d_11 (MaxPooling (None, 12, 12, 64)        0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 1, 1, 128)         1179776   
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 1, 1, 65)          8385      
_________________________________________________________________
flatten_7 (Flatten)          (None, 65)                0         
Total para

<tensorflow.python.keras.callbacks.History at 0x1286d3f98>

In [21]:
# Time-distributing lenet over two non-overlapping patches of the image

def lenet_td(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    
    inputs = Input(shape=(784,))
    reshaped_inputs = Reshape((2, 28, 14, 1))(inputs)
    td_outputs = TimeDistributed(model)(reshaped_inputs)
    flat_td_outputs = Flatten()(td_outputs)
    outputs = Dense(num_classes, activation='softmax')(flat_td_outputs)
    model2 = KerasModel(inputs=inputs, outputs=outputs)
    
    model2.summary()
    return model2


data = EmnistDataset()
nn = lenet_td(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train[:1000],
    y_train=data.y_train[:1000],
    loss='categorical_crossentropy',
    epochs=5,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 784)               0         
_________________________________________________________________
reshape_18 (Reshape)         (None, 2, 28, 14, 1)      0         
_________________________________________________________________
time_distributed_5 (TimeDist (None, 2, 65)             518849    
_________________________________________________________________
flatten_16 (Flatten)         (None, 130)               0         
_________________________________________________________________
dense_31 (Dense)             (None, 65)                8515      
Total params: 527,364
Trainable params: 527,364
Non-trainable params: 0
_________________________________________________________________
Train on 750 samples, validate on 250 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Training took 4.321935 s


<tensorflow.python.keras.callbacks.History at 0x129ef20b8>

In [29]:
# All-conv solution that does the same thing

def lenet22(image_height: int, image_width: int, num_classes: int):
    model = Sequential()
    model.add(Reshape((56, 14, 1), input_shape=(784,)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(image_height, image_width, 1)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(128, (13, 5), strides=(13, 1), activation='relu'))
    model.add(Conv2D(num_classes, (1, 1), activation='softmax'))
    model.add(Flatten())
    model.add(Dense(num_classes, activation='softmax'))
    model.summary()
    return model


data = EmnistDataset()
nn = lenet22(28, 28, data.num_classes)
train_model(
    model=nn,
    x_train=data.x_train[:1000],
    y_train=data.y_train[:1000],
    loss='categorical_crossentropy',
    epochs=5,
    batch_size=128
)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
reshape_25 (Reshape)         (None, 56, 14, 1)         0         
_________________________________________________________________
conv2d_72 (Conv2D)           (None, 54, 12, 32)        320       
_________________________________________________________________
conv2d_73 (Conv2D)           (None, 52, 10, 64)        18496     
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 26, 5, 64)         0         
_________________________________________________________________
conv2d_74 (Conv2D)           (None, 2, 1, 128)         532608    
_________________________________________________________________
conv2d_75 (Conv2D)           (None, 2, 1, 65)          8385      
_________________________________________________________________
flatten_21 (Flatten)         (None, 130)               0         
__________

<tensorflow.python.keras.callbacks.History at 0x13076b518>