# KMNIST Example with your own implementation (BONUS)

Now, in this part, you will work with Kuzushiji-MNIST data (https://github.com/rois-codh/kmnist) for character classification. 
The images contain one of the 10 characters in Kuzusjihi(cursive Japanese) Alphabet.
Use an appropriate loss function.

You should build a ConvNet architecture including all layers such as Conv2d, Maxpool, Dropout, and BatchNorm. You are free to design the layers as you like.

IMPORTANT: You are NOT allowed to use sklearn or any other implementations for the learning part . You are ALLOWED ONLY TO USE your own implementation from the above steps.

"KMNIST Dataset" (created by CODH), adapted from "Kuzushiji Dataset" (created by NIJL and others), doi:10.20676/00000341

In [1]:
from blg561.layer import layers_with_weights
from blg561.layer import layer
from blg561.layer.optimizers import SGDWithMomentum, VanillaSDGOptimizer
from blg561.layer.model import Model
from blg561.checks import grad_check,rel_error
import numpy as np
import scipy as sci
import os
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import time
from sklearn.preprocessing import StandardScaler

%matplotlib inline
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [2]:
# create your own dataloader
def load_kmnist():
    
    X_train = np.load("./kmnist/kmnist-train-imgs.npz")['arr_0']
    X_train = np.expand_dims(X_train, axis=1)
    X_test = np.load("./kmnist/kmnist-test-imgs.npz")['arr_0']
    X_test = np.expand_dims(X_test, axis=1)
    y_train = np.load("./kmnist/kmnist-train-labels.npz")['arr_0']
    y_test = np.load("./kmnist/kmnist-test-labels.npz")['arr_0']
    
    X_val = X_test[0:3000]
    X_test = X_test[3000:10000]
    y_val = y_test[0:3000]
    y_test = y_test[3000:10000]
    
    print(X_train.shape)
    
    return X_train[:100]/128-1, X_test[:100], X_val[:100], y_train[:100], y_test[:100], y_val[:100]

xtr, xte, xval, ytr, yte, yval = load_kmnist()

print(xtr)


(60000, 1, 28, 28)
[[[[-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   ...
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]]]


 [[[-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   ...
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.        -1.        ... -1.        -1.
    -1.       ]]]


 [[[-1.        -1.        -1.        ... -1.        -1.
    -1.       ]
   [-1.        -1.     

#### Create the model

In below, we provide an example model.

#### Train your model with the data and show the results as Loss Curves and Accuracy for Test in a Confusion Matrix (You can use scikit-learn's confusion matrix)

In [3]:
model = Model()
layers = [
          layers_with_weights.Conv2d(in_size=1, out_size=32, kernel_size=3, stride=1, padding=1),
          layer.BatchNorm(32),
          layer.ReLU(),
          layers_with_weights.Conv2d(in_size=32, out_size=32, kernel_size=3, stride=1, padding=1),
          layer.BatchNorm(32),
          layer.ReLU(), 
          layer.MaxPool2d(pool_height=2, pool_width=2, stride=1),
          layers_with_weights.Conv2d(in_size=32, out_size=64, kernel_size=3, stride=1, padding=1),
          layer.BatchNorm(64),
          layer.ReLU(),
          layer.MaxPool2d(pool_height=2, pool_width=2, stride=1),
          layer.Flatten(), 
          layers_with_weights.AffineLayer(43264, 128), 
          layer.BatchNorm(128),
          layer.ReLU(),
          layers_with_weights.AffineLayer(128,10),
          layer.Softmax()
        ]

model(layers) # Load layers to model object


In [4]:
from numpy import random

def xavier_init(model): ## execute xavier init on the model and return it
    for i in range(len(model.layers)): ## iterate over layers
        try:
            model.layers[i].W = np.random.randn(model.layers[i].W.shape) * np.sqrt(2.0 / model.layers[i].W.shape[-1]) ## xavier init
        except:
            continue
    return model

def get_mini_batches(X, y, batch_size):
    random_idxs = random.choice(X.shape[0], X.shape[0], replace=False)
    X_shuffled = X[random_idxs]
    y_shuffled = y[random_idxs]

    mini_batches = [(X_shuffled[i*batch_size:(i+1)*batch_size], y_shuffled[i*batch_size:(i+1)*batch_size]) for i in range(X.shape[0] // batch_size)]
    return mini_batches

def get_mb_len(X, y, batch_size):
    random_idxs = random.choice(X.shape[0], X.shape[0], replace=False)
    X_shuffled = X[random_idxs]
    y_shuffled = y[random_idxs]
    mini_batches = [(X_shuffled[i*batch_size:(i+1)*batch_size], y_shuffled[i*batch_size:(i+1)*batch_size]) for i in range(X.shape[0] // batch_size)]
    return len(mini_batches)

def whole_train_w_batch(model, x_train, y_train, x_val, y_val, x_test, y_test, lr=1e-2, reg=1e-3, epochs=200, verbose=False, optim="sgd", batch=500):

    model = xavier_init(model)
    predictions = np.ones(150)
    train_accs = []
    test_accs = []
    val_accs = []
    train_losses = []
    test_losses = []
    val_losses = []
    W_affine_layers = []
    regularization_strength = reg
    n_epochs = epochs
    print_every = 1
    test_every = 1
    mb_len = get_mb_len(x_train, y_train, batch)

    if (optim=="sgd"):
        optimizer = VanillaSDGOptimizer(model, lr=lr, regularization_str=regularization_strength)
    elif (optim=="sgdm"):
        optimizer = SGDWithMomentum(model,lr=lr, regularization_str=regularization_strength, mu=.5)

    for epoch in range(n_epochs):

        i = 0
        val_acc_per_epoch = 0
        tra_acc_per_epoch = 0
        tes_acc_per_epoch = 0

        for xybatch in tqdm(get_mini_batches(x_train, y_train, batch), "Epoch " + str(epoch), leave=False):
            xbatch = xybatch[0]
            ybatch = xybatch[1]
            softmax_out = model.forward(xbatch)
            predictions = np.argmax(softmax_out, axis=1)
            print(np.mean(predictions == ybatch))
            tra_acc_per_epoch += np.mean(predictions == ybatch)
            tra_loss = layer.loss(softmax_out, ybatch)
            train_losses.append(tra_loss)
            model.backward(ybatch)
            #print("bw tamam")
            optimizer.optimize()
            #print("optim tamam")

            if epoch % test_every == 5:
                softmax_out = model.forward(x_val)
                predictions = np.argmax(softmax_out, axis=1)
                val_loss = layer.loss(softmax_out, y_val)
                val_acc_per_epoch += np.mean(predictions == y_val)
                val_losses.append(val_loss)
                softmax_out = model.forward(x_test)
                predictions = np.argmax(softmax_out, axis=1)
                tes_loss = layer.loss(softmax_out, y_test)
                tes_acc_per_epoch += np.mean(predictions == y_test)
                test_losses.append(tes_loss)
                #print("test tamam")
                
        tra_acc_per_epoch /= mb_len
        tes_acc_per_epoch /= mb_len
        val_acc_per_epoch /= mb_len

        train_accs.append(tra_acc_per_epoch)
        test_accs.append(tes_acc_per_epoch)
        val_accs.append(val_acc_per_epoch)
        #print("loop tamam")
        if (epoch % print_every == 0) and verbose:
            print("Epoch: {}, Loss: {}, Accuracy: {}".format(epoch, tra_loss, tra_acc_per_epoch))
            #print("Epoch: {}, Val Loss: {}, Val Accuracy: {}".format(epoch, val_loss, val_acc_per_epoch))
            #print("Epoch: {}, Test Loss: {}, Test Accuracy: {}".format(epoch, tes_loss, tes_acc_per_epoch))
        time.sleep(0.5)
    return train_losses, test_losses, val_losses, train_accs, test_accs, val_accs, model


In [5]:
train_losses2, a, val_losses2, b, c, d, model = whole_train_w_batch(model, xtr, ytr, xval, yval, xte, yte, batch=8, epochs=15, optim="sgdm", verbose=True)

Epoch 0:   0%|          | 0/12 [00:00<?, ?it/s]

0.375


Epoch 0:   8%|▊         | 1/12 [00:09<01:43,  9.45s/it]

0.25


Epoch 0:  17%|█▋        | 2/12 [00:18<01:34,  9.44s/it]

0.0


Epoch 0:  25%|██▌       | 3/12 [00:28<01:25,  9.46s/it]

0.125


Epoch 0:  33%|███▎      | 4/12 [00:37<01:15,  9.45s/it]

0.25


Epoch 0:  42%|████▏     | 5/12 [00:47<01:06,  9.51s/it]

0.0


Epoch 0:  50%|█████     | 6/12 [00:56<00:56,  9.49s/it]

0.125


Epoch 0:  58%|█████▊    | 7/12 [01:06<00:47,  9.46s/it]

0.0


KeyboardInterrupt: 