<div style="background: #e6e6d8 url('https://dl.dropboxusercontent.com/s/t0gu051d08sei65/bg-retro-noise.png');
  color: #121212; min-height:300px;">
    
<section style="position: absolute;
  width: 100%;
  min-width: 800px;
  text-align: center;
  top: 50%;
  margin-top: -55px;">
    
  <h3 style="transform: matrix(1, -0.15, 0, 1, 0, 0);
  -ms-transform: matrix(1, -0.10, 0, 1, 0, 0);
  -webkit-transform: matrix(1, -0.15, 0, 1, 0, 0);
  text-transform: uppercase;
  font-weight: 400;
  font-size: 70px;
  text-shadow: 4px 5px #e6e6d8, 6px 7px #c6a39a;">Chinese MNIST</h3>
    <p style="text-align:center;">@koayhongvin</p>
</section>
    </div>

<ul class="list-group">
  <li class="list-group-item active">Notebook Content</li>
    
  <li class="list-group-item d-flex justify-content-between align-items-center" ><span class="badge badge-primary badge-pill">1</span> <a href="#1">
    Insights of Data
    </a>
  </li>
  <li class="list-group-item d-flex justify-content-between align-items-center"> <span class="badge badge-primary badge-pill">2</span> <a href="#2">
    VGG-like model
     </a>
  </li>
  <li class="list-group-item d-flex justify-content-between align-items-center"><span class="badge badge-primary badge-pill">3</span> <a href="#3">
    No-Dense Model
    </a>
  </li>
    <li class="list-group-item d-flex justify-content-between align-items-center"><span class="badge badge-primary badge-pill">4</span> <a href="#4">
    Simpler Model
    </a>
  </li>
    <li class="list-group-item d-flex justify-content-between align-items-center"><span class="badge badge-primary badge-pill">5</span> <a href="#5">
    Visualizing the intermidate layer (Simpler Model)
    </a>
  </li>
</ul>

<div>
<a href="https://khvmaths.medium.com/chinese-digit-mnist-1b46f51e8f75"><p style="text-align:center;">Medium Article:</p></a></div>

<h1 id="#1">Good Practices: The insights of the data</h1>

**Check for distribution of data**

In [None]:
import pandas as pd
data_df=pd.read_csv('../input/chinese-mnist/chinese_mnist.csv')
data_df.groupby(["value","character"]).size()

**Check for missing data**

In [None]:
total = data_df.isnull().sum().sort_values(ascending = False)
percent = (data_df.isnull().sum()/data_df.isnull().count()*100).sort_values(ascending = False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data

**Well, no missing data, but how about missing images?**

In [None]:
import os
image_files = list(os.listdir("../input/chinese-mnist/data/data"))
print("Number of image files: {}".format(len(image_files)))

**Add the image path and sizes to the dataframe!**

In [None]:
import skimage.io
import numpy as np

def image_files(x):
    file_name = f"input_{x[0]}_{x[1]}_{x[2]}.jpg"
    return file_name
data_df["file"] = data_df.apply(image_files, axis=1)

def image_sizes(file_name):
    image = skimage.io.imread("../input/chinese-mnist/data/data/" + file_name)
    return list(image.shape)

image_size = np.stack(data_df['file'].apply(image_sizes))
image_size_df = pd.DataFrame(image_size,columns=['w','h'])
data_df = pd.concat([data_df,image_size_df],axis=1, sort=False)

data_df.head()

**Split the dataset**

In [None]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(data_df, test_size=0.2, random_state=42, stratify=data_df["code"].values)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42, stratify=train_df["code"].values)

print("Train set: {}".format(train_df.shape[0]))
print("Test set: {}".format(test_df.shape[0]))
print("Validation set: {}".format(val_df.shape[0]))

**Encode the categories**

In [None]:
import skimage.transform

def read_image(file_name):
    image = skimage.io.imread("../input/chinese-mnist/data/data/" + file_name)
    image = skimage.transform.resize(image, (64, 64, 1), mode='reflect')
    return image

def categories_encoder(dataset, var='character'):
    X = np.stack(dataset['file'].apply(read_image))
    y = pd.get_dummies(dataset[var], drop_first=False)
    return X, y

X_train, y_train = categories_encoder(train_df)
X_val, y_val = categories_encoder(val_df)
X_test, y_test = categories_encoder(test_df)

<h1 id="#2">VGG-like Model</h1>

In [None]:
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout, BatchNormalization,LeakyReLU
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from keras.utils import to_categorical

model=Sequential()
model.add(Conv2D(32, kernel_size=3, input_shape=(64, 64,1), activation='relu', padding='same'))
model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(y_train.columns.size, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1)

train_model  = model.fit(X_train, y_train,
                  batch_size=32,
                  epochs=10,
                  verbose=1,
                  validation_data=(X_val, y_val))

In [None]:
model.evaluate(X_test, y_test)

In [None]:
from sklearn import metrics
print(metrics.classification_report(np.argmax(y_test.values,axis=1), np.argmax(model.predict(X_test),axis=1), target_names=y_test.columns))

<h1 id="#2">No-Dense Model</h1>

In [None]:
from tensorflow.keras.layers import GlobalAveragePooling2D, Activation

model=Sequential()
model.add(Conv2D(32, kernel_size=3, input_shape=(64, 64,1), activation='relu', padding='same'))
# model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
# model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))

model.add(Conv2D(y_train.columns.size, kernel_size=3, activation='relu', padding='same'))
model.add(GlobalAveragePooling2D())
model.add(Activation('softmax'))
# model.add(Dense(y_train.columns.size, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1)
train_model  = model.fit(X_train, y_train,
                  batch_size=32,
                  epochs=50,
                  verbose=1,
                  validation_data=(X_val, y_val),
                  callbacks=[early_stopping])

In [None]:
model.evaluate(X_test, y_test)

In [None]:

print(metrics.classification_report(np.argmax(y_test.values,axis=1), np.argmax(model.predict(X_test),axis=1), target_names=y_test.columns))

<h1 id="#4">Simpler Model</h1>

In [None]:
model=Sequential()
model.add(Conv2D(32, kernel_size=3, input_shape=(64, 64,1), activation='relu', padding='same'))
# model.add(Conv2D(32, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
# model.add(Conv2D(64, kernel_size=3, activation='relu', padding='same'))
model.add(MaxPool2D(2))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(y_train.columns.size, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
early_stopping = EarlyStopping(monitor='loss', patience=10, verbose=1)

train_model  = model.fit(X_train, y_train,
                  batch_size=32,
                  epochs=50,
                  verbose=1,
                  validation_data=(X_val, y_val),
                  callbacks=[early_stopping])

In [None]:
model.evaluate(X_test, y_test)

In [None]:

print(metrics.classification_report(np.argmax(y_test.values,axis=1), np.argmax(model.predict(X_test),axis=1), target_names=y_test.columns))

<h1 id="#5">Visualizing the Model</h1>

In [None]:
from keras import models

layer_outputs = [layer.output for layer in model.layers][0:6]
activation_model = models.Model(inputs=model.input, outputs=layer_outputs) 
activations = activation_model.predict(X_test[0:1])

layer_names = []
for layer in model.layers:
    layer_names.append(layer.name)
    
images_per_row = 16

for layer_name, layer_activation in zip(layer_names, activations): # Displays the feature maps
    n_features = layer_activation.shape[-1] # Number of features in the feature map
    size = layer_activation.shape[1] #The feature map has shape (1, size, size, n_features).
    n_cols = n_features // images_per_row # Tiles the activation channels in this matrix
    display_grid = np.zeros((size * n_cols, images_per_row * size))
    for col in range(n_cols): # Tiles each filter into a big horizontal grid
        for row in range(images_per_row):
            channel_image = layer_activation[0,
                                             :, :,
                                             col * images_per_row + row]
            channel_image -= channel_image.mean() # Post-processes the feature to make it visually palatable
            channel_image /= channel_image.std()
            channel_image *= 64
            channel_image += 128
            channel_image = np.clip(channel_image, 0, 255).astype('uint8')
            display_grid[col * size : (col + 1) * size, # Displays the grid
                         row * size : (row + 1) * size] = channel_image
    scale = 1. / size
    plt.figure(figsize=(scale * display_grid.shape[1],
                        scale * display_grid.shape[0]))
    plt.title(layer_name)
    plt.grid(False)
    plt.imshow(display_grid, aspect='auto', cmap='viridis')

<div class="jumbotron">
  <h1 class="display-4">Thank you for making till the end!</h1>
  <p class="lead">Please upvote if you find this notebook helps you. Any comments are welcomed. I'd love to get feedbacks!</p>
  
</div>