# 

## How to make a digit recognizer <font color=red>from scrach without frameworks</font>, Numpy-based solver.

### <font color="Blue">1. Preparation before enjoying MNIST</font>

In [None]:
# import libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
#Follow Kaggle's way to load datasets.
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
train_data = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")
quiz_data = pd.read_csv("/kaggle/input/digit-recognizer/test.csv")

In [None]:
# X= 42,000 images of 28*28=784, Y= Correct labels 
X, T =train_data.iloc[0:,1:],train_data.iloc[0:,[0]]
X=X.to_numpy()
T=T.to_numpy()
# Split data into training data and test data 
x_train, x_test, t_train, t_test = train_test_split(X, T, test_size=0.2)
x_train = x_train.reshape(-1,1,28,28)
x_test = x_test.reshape(-1,1,28,28)
# Apply one-hot-vector to Taget data and change them into int type
t_train = np.eye(10)[t_train.astype("int")].reshape(-1,10)
t_test = np.eye(10)[t_test.astype("int")].reshape(-1,10)
# Change quiz data into Numpy array
quiz_x = quiz_data.iloc[0:,0:].to_numpy()
quiz_x = quiz_x.reshape(-1,1,28,28)

### <font color="Blue">2. Let's check sample images.</font>

In [None]:
# Let's check sample images 28*28=784
%matplotlib inline
for i in range(5):
    plt.imshow(x_train[i][0],cmap='Greys')
    plt.show()
    print("label: ", t_train[i])

### <font color="Blue">3. Set classes and definitions like softmax,cross entropy,Relu.etc </font>

In [None]:
#This softmax function is applied an overflow countermajor.
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)   # x was trasposed on the previous line. That's why axis is 0.
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 
    x = x - np.max(x) 
    return np.exp(x) / np.sum(np.exp(x))

def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    # argmax picked out the laragest array number in t. 
    if t.size == y.size:
        t = t.argmax(axis=1)
             
    batch_size = y.shape[0]
    delta = 1e-8
    return -np.sum(np.log(y[np.arange(batch_size), t] + delta)) / batch_size


In [None]:
#Definition of im2col, that is a useful image flatten transformation.
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    # Num = number of data , Channel = channel 、Height = height of image, W = Width
    Num, Channel, Height, Width = input_data.shape
    out_h = (Height + 2*pad - filter_h)//stride + 1
    out_w = (Width + 2*pad - filter_w)//stride + 1
    
    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)] , 'constant') 
    col = np.zeros((Num, Channel, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            #
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(Num*out_h*out_w, -1)
    return col

In [None]:
#Definition of col2im, that is a backward transaction of im2col
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0): 
    # Num = number of data , Channel = channel 、Height = height of image, W = Width
    Num, Channel, Height, Width = input_shape
    out_h = (Height + 2*pad - filter_h)//stride + 1
    out_w = (Width + 2*pad - filter_w)//stride + 1
    col = col.reshape(Num, out_h, out_w, Channel, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    img = np.zeros((Num, Channel, Height + 2*pad + stride - 1, Width + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]
               

    return img[:, :, pad:Height + pad, pad:Width + pad]

In [None]:
#Set Relu function
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx


In [None]:
# Set Affine layer
class Affine:
    def __init__(self, input_size, output_size):
        self.W = 0.01 * np.random.randn(input_size, output_size)
        self.b = np.zeros(output_size)
        self.x = None
        self.original_x_shape = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x
        out = np.dot(self.x, self.W) + self.b
        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        #Decode data from reshaped one to iput shape
        dx = dx.reshape(*self.original_x_shape) 
        return dx


In [None]:
# Set Softmax with Loss Class
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        
        #When Target data is one-hot-vector 
        if self.t.size == self.y.size:
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size
        
        return dx

In [None]:
#Set convolution class
class Convolution:
    def __init__(self, input_channel, output_channel, kernel_h=5, kernel_w=5, stride=1, pad=0):
        self.W = 0.01 * np.random.randn(output_channel, input_channel, kernel_h, kernel_w)
        self.b = np.zeros(output_channel)
        self.stride = stride
        self.pad = pad
        self.x = None   
        self.col = None
        self.col_W = None
        self.dW = None
        self.db = None

    def forward(self, x):
        #Num_filters = Number of filters, Channel= Number of Channels, F_height=filter height, F_width= filter width
        Num_filters, Channel, F_height, F_width = self.W.shape
        Num, Channel, Height, Width = x.shape
        out_h = 1 + int((Height + 2*self.pad - F_height) / self.stride)
        out_w = 1 + int((Width + 2*self.pad - F_width) / self.stride)
        col = im2col(x, F_height, F_width, self.stride, self.pad)
        col_W = self.W.reshape(Num_filters, -1).T
        out = np.dot(col, col_W) + self.b
        out = out.reshape(Num, out_h, out_w, -1).transpose(0, 3, 1, 2)
        self.x = x
        self.col = col
        self.col_W = col_W
        return out
    
    def backward(self, dout):
        #Num_filters = Number of filters, Channel= Number of Channels, F_height=filter height, F_width= filter width
        Num_filters, Channel, F_height, F_width = self.W.shape
        dout = dout.transpose(0,2,3,1).reshape(-1, Num_filters)
        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(Num_filters, Channel, F_height, F_width)
        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, F_height, F_width, self.stride, self.pad)
        return dx

In [None]:
#Set Pooling Class
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
        self.x = None
        self.arg_max = None

    def forward(self, x):
        Num, Channel, Height, Width = x.shape
        out_h = int(1 + (Height - self.pool_h) / self.stride)
        out_w = int(1 + (Width - self.pool_w) / self.stride)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)
        arg_max = np.argmax(col, axis=1)
        out = np.max(col, axis=1)
        out = out.reshape(Num, out_h, out_w, Channel).transpose(0, 3, 1, 2)
        self.x = x
        self.arg_max = arg_max
        return out

    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,))  
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        return dx

In [None]:
#Set Simple Convolution Network. x=input data, t=labels of target
class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28), output_size=10, weight_init_std=0.01):
        self.layers = dict()
        self.layers['Conv1'] = Convolution(1,10,5,5)
        self.layers['Relu1'] = Relu()
        self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Conv2'] =Convolution(10,10,5,5)
        self.layers['Relu2'] = Relu()
        self.layers['Affine'] = Affine(640, 10)
        self.last_layer = SoftmaxWithLoss()
        self.params = {}
        self.params['W1'] = self.layers['Conv1'].W
        self.params['b1'] = self.layers['Conv1'].b
        self.params['W2'] = self.layers['Conv2'].W
        self.params['b2'] = self.layers['Conv2'].b
        self.params['W3'] = self.layers['Affine'].W
        self.params['b3'] = self.layers['Affine'].b

    def forward(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.forward(x) 
        return self.last_layer.forward(y, t)

    def backward(self, x, t):
        self.loss(x, t)
        dout = 1
        dout = self.last_layer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
        grads['W2'], grads['b2'] = self.layers['Conv2'].dW, self.layers['Conv2'].db
        grads['W3'], grads['b3'] = self.layers['Affine'].dW, self.layers['Affine'].db
        return grads
#Set NN
network = SimpleConvNet(input_dim=(1,28,28),  output_size=10, weight_init_std=0.01)
#Set Static Gradient Descent
class SGD:
    def __init__(self, lr=0.01):
        self.lr = lr
        
    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key] 
#Set learning rate .etc
acc_list_SGD = []
sgd = SGD(lr = 0.008)
batch_size = 256

In [None]:
class Adam:

    def __init__(self, lr, beta1, beta2): 
        self.lr = lr
        self.beta1 = beta1
        self.beta2 = beta2
        self.iter = 0
        self.m = None
        self.v = None
        
    def update(self, params, grads):
        if self.m is None:
            self.m, self.v = {}, {}
            for key, val in params.items():
                self.m[key] = np.zeros_like(val)
                self.v[key] = np.zeros_like(val)
        
        self.iter += 1
        lr_t  = self.lr * np.sqrt(1.0 - self.beta2**self.iter) / (1.0 - self.beta1**self.iter)         
        
        for key in params.keys():
            self.m[key] += (1 - self.beta1) * (grads[key] - self.m[key])
            self.v[key] += (1 - self.beta2) * (grads[key]**2 - self.v[key])
            
            params[key] -= lr_t * self.m[key] / (np.sqrt(self.v[key]) + 1e-7)
#Set learning rate .etc
acc_list_adam = []
adam = Adam(lr=0.0009, beta1=0.9, beta2=0.999)
batch_size = 256

### <font color="Blue">4. Let's start training with Static Gradient Descent. </font>

In [None]:
#Traing with Static Gradient Descent
for epoch in range(10):
    perm = np.random.permutation(len(x_train))
    for idx in np.arange(0, len(perm), batch_size):
        x = x_train[perm[idx:idx+batch_size]]
        t =  t_train[perm[idx:idx+batch_size]]
        grads = network.backward(x, t)
        sgd.update(network.params,grads)
        
    y_test = network.forward(x_test)
    acc_list_SGD.append((y_test.argmax(axis=1) == t_test.argmax(axis=1)).mean())
    print(f'epoch {epoch + 1} | accuracy {acc_list_SGD[-1]:.2%}')

### <font color="Blue">5. Let's start training with Adam. </font>

In [None]:
#Traing with Adam
for epoch in range(10):
    perm = np.random.permutation(len(x_train))
    for idx in np.arange(0, len(perm), batch_size):
        x = x_train[perm[idx:idx+batch_size]]
        t =  t_train[perm[idx:idx+batch_size]]
        grads = network.backward(x, t)
        adam.update(network.params,grads)
        
    y_test = network.forward(x_test)
    acc_list_adam.append((y_test.argmax(axis=1) == t_test.argmax(axis=1)).mean())
    print(f'epoch {epoch + 1} | accuracy {acc_list_adam[-1]:.2%}')

In [None]:
plt.plot(acc_list_SGD, label='Static Gradient Descent') 
plt.plot(acc_list_adam, label='Adam') 
plt.legend() 

### <font color="Blue">6. Let's predict by using test data! </font>

#### (1)Test sample

In [None]:
test_sample_num=2900
plt.imshow(quiz_x[test_sample_num][0],cmap='Greys')

In [None]:
print(network.forward(quiz_x[[test_sample_num]]))
test_answer=np.argmax(network.forward(quiz_x[[test_sample_num]]))
print("------------------------------------------------------")
print("The prediction is",test_answer,".","That's correct!")
print("------------------------------------------------------")

### <font color="Blue">7. Let's make submission data! </font>

In [None]:
list_predict=[]
for j in range(28000):
    pj=np.argmax(network.forward(quiz_x[[j]]))
    list_predict.append(pj)
Quiz_TL = pd.Series(list_predict, name="Label").astype("int32")

In [None]:
listIndex=[]
[listIndex.append(i) for i in range(1,28001)]
ImageID = pd.Series(listIndex, name="ImageID").astype("int32")

In [None]:
submission = pd.concat([ImageID,Quiz_TL],axis = 1)
submission.tail()

In [None]:
submission.to_csv("prediction_from_scratch.csv", index=False)
print("prediction_from_scratch.csv was saved.")