In [None]:
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

# Load data

In [None]:
csv = pd.read_csv("../input/chinese-mnist/chinese_mnist.csv")
print(csv)

In [None]:
X = []
y = []
for row in csv.itertuples():
    suite_id = row[1]
    sample_id = row[2]
    code = row[3]
    file_name = f"input_{suite_id}_{sample_id}_{code}.jpg"
    x = cv2.imread(f"../input/chinese-mnist/data/data/{file_name}")
    X.append(x)
    one_hot = [0] * 15
    one_hot[code - 1] = 1
    y.append(one_hot)
X = np.array(X)
y = np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
print(X_train.shape)
print(y_train.shape)

# Baseline CNN

In [None]:
model_base = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=32,
                           kernel_size=(3,3),
                           padding="same",
                           activation="relu",
                           input_shape=[64, 64, 3]),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(filters=64,
                           kernel_size=(3,3),
                           padding="same",
                           activation="relu"),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Conv2D(filters=64,
                           kernel_size=(3,3),
                           padding="same",
                           activation="relu"),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(64, activation="relu"),
    tf.keras.layers.Dense(15, activation="softmax")
])
model_base.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
model_base.summary()

In [None]:
history = model_base.fit(X_train, y_train, epochs=20, 
                    validation_data=(X_test, y_test))

In [None]:
plt.plot(history.history['accuracy'], label='accuracy')
plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')

## Sanity check

In [None]:
sample = cv2.imread("../input/chinese-mnist/data/data/input_100_10_4.jpg")
plt.imshow(sample)
sample = np.array([sample])
sample_prediction = np.argmax(model_base.predict([sample]))
print(f"Prediction: {sample_prediction}")

In [None]:
prediction_base = model_base.predict(X_test)
binary_base = tf.keras.metrics.categorical_accuracy(y_test, prediction_base)
print(f"The accuracy of the model is: {np.sum(binary_base)/y_test.shape[0]}")

# Ensemble method

In [None]:
num_models = 10
model_ensemble = [None] * num_models

for i in range(num_models):
    model_ensemble[i] = tf.keras.Sequential([
        tf.keras.layers.Conv2D(filters=32,
                               kernel_size=(3,3),
                               padding="same",
                               activation="relu",
                               input_shape=[64, 64, 3]),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Conv2D(filters=64,
                               kernel_size=(3,3),
                               padding="same",
                               activation="relu"),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Conv2D(filters=64,
                               kernel_size=(3,3),
                               padding="same",
                               activation="relu"),
        tf.keras.layers.MaxPooling2D((2,2)),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(15, activation="softmax")
    ])
    model_ensemble[i].compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

In [None]:
history = [None] * num_models

for j in range(num_models):
    X_train_ens, X_test_ens, y_train_ens, y_test_ens = train_test_split(X_train, y_train, test_size=0.1, random_state=j)
    history[j] = model_ensemble[j].fit(X_train_ens, y_train_ens, epochs=20, 
                                       validation_data=(X_test_ens, y_test_ens))
    print(f"Model {j+1} of 10:")
    print(f"Best training accuracy: {max(history[j].history['accuracy'])}")
    print(f"Best validation accuracy: {max(history[j].history['val_accuracy'])}")

In [None]:
prediction_ensemble = np.zeros(y_test.shape)
for k in range(num_models):
    prediction_ensemble = prediction_ensemble + model_ensemble[j].predict(X_test)
prediction_ensemble = prediction_ensemble / num_models
binary_ensemble = tf.keras.metrics.categorical_accuracy(y_test, prediction_ensemble)
print(f"The accuracy of the model is: {np.sum(binary_ensemble)/y_test.shape[0]}")

# Branching 

In [None]:
input_img = tf.keras.layers.Input(shape=(64, 64, 3))
conv_1 = tf.keras.layers.Conv2D(filters=16,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(input_img)
conv_2 = tf.keras.layers.Conv2D(filters=32,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_1)
conv_3 = tf.keras.layers.Conv2D(filters=32,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_2)
branch_1 = tf.keras.layers.MaxPooling2D((2,2))(conv_3)
conv_4 = tf.keras.layers.Conv2D(filters=32,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_3)
conv_5 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_4)
conv_6 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_5)
branch_2 = tf.keras.layers.MaxPooling2D((2,2))(conv_6)
conv_7 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_6)
conv_8 = tf.keras.layers.Conv2D(filters=128,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_7)
conv_9 = tf.keras.layers.Conv2D(filters=128,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_8)
branch_3 = tf.keras.layers.MaxPooling2D((2,2))(conv_9)
concat = tf.keras.layers.concatenate([branch_1, branch_2, branch_3], axis=-1)
flatten = tf.keras.layers.Flatten()(concat)
dense = tf.keras.layers.Dense(64, activation="relu")(flatten)
output = tf.keras.layers.Dense(15, activation="softmax")(dense)
model_branching = tf.keras.models.Model(inputs=input_img, outputs=output)
model_branching.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
model_branching.summary()

In [None]:
history_branching = model_branching.fit(X_train, y_train, epochs=20, 
                              validation_data=(X_test, y_test))

In [None]:
plt.plot(history_branching.history['accuracy'], label='accuracy')
plt.plot(history_branching.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')

In [None]:
input_img = tf.keras.layers.Input(shape=(64, 64, 3))
conv_1 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(input_img)
conv_2 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_1)
conv_3 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_2)
branch_1 = tf.keras.layers.MaxPooling2D((2,2))(conv_3)
conv_4 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_3)
conv_5 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_4)
conv_6 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_5)
branch_2 = tf.keras.layers.MaxPooling2D((2,2))(conv_6)
conv_7 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_6)
conv_8 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_7)
conv_9 = tf.keras.layers.Conv2D(filters=64,
                                kernel_size=(3,3),
                                padding="same",
                                activation="relu")(conv_8)
branch_3 = tf.keras.layers.MaxPooling2D((2,2))(conv_9)
added = tf.keras.layers.add([branch_1, branch_2, branch_3])
flatten = tf.keras.layers.Flatten()(added)
dense = tf.keras.layers.Dense(64, activation="relu")(flatten)
output = tf.keras.layers.Dense(15, activation="softmax")(dense)
model_branching_summed = tf.keras.models.Model(inputs=input_img, outputs=output)
model_branching_summed.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
model_branching_summed.summary()

In [None]:
history_summed = model_branching_summed.fit(X_train, y_train, epochs=20, 
                              validation_data=(X_test, y_test))

In [None]:
plt.plot(history_summed.history['accuracy'], label='accuracy')
plt.plot(history_summed.history['val_accuracy'], label = 'val_accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.ylim([0.5, 1])
plt.legend(loc='lower right')