In [1]:
from convolutional_network import *
from optimizer import SGD
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
num_sample = 1000

In [3]:
df = pd.read_csv('train.csv')
x = df.drop(columns='label').to_numpy()
x = x/255.0
x = x.reshape(x.shape[0], 28, 28, 1)
x = np.pad(x, pad_width=((0, 0), (2, 2), (2, 2), (0, 0)), mode='constant', constant_values=0)
random_indices = np.random.choice(x.shape[0], size=num_sample, replace=False)
x = x[random_indices]
x.shape

(1000, 32, 32, 1)

In [4]:
def one_hot_coding(y):
    y_new = []
    for i in y.values:
        y_n = [0] * 10
        y_n[i] = 1
        y_new.append(y_n)
    y_new = np.array(y_new)
    return y_new

In [5]:
y = df['label']
y = one_hot_coding(y)
y = y[random_indices]
y.shape

(1000, 10)

### Spliting data

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=.9)

## Define LeNet-5 Model

In [7]:
lenet5 = Convolutional_Neural_Network()

**LeNet-5 Architecture**

Input -> Convo(5, 5) -> Subsampling -> Convo(5, 5) ->Subsampling -> Convo(5, 5) -> Flatten -> Dense -> Dense (Output)

## Adding Layers

**Input_shape = (32, 32, 1)**

**Layer 0 : Convolutional Layer** 
- input shape         = (32, 32, 1)
- number of filters   = 6
- kernel size         = (5, 5)
- activation function = tanh
- stride              = 1
- padding             = valid\
***-> output shape    = (28, 28, 6)***

In [8]:
lenet5.add(Layers.Convo(num_filter=6, kernel_size=(5, 5), activation='tanh', stride=1, padding='valid', input_shape=(1, 32, 32, 1))) # ! layer 0

genearate filters


**Layer 1 : Subsampling Layer (Average Pooling)** 
- input shape = (28, 28, 6)
- kernel size = (2, 2)
- stride = 2
- padding = valid\
***-> output shape = (14, 14, 6)***

In [9]:
lenet5.add(Layers.AvgPool(kernel_size=(2, 2), stride=2, padding='valid')) # ! layer 1

**Layer 2 : Convolutional Layer**
- input shape         = (14, 14, 6)
- number of filters   = 16
- kernel size         = (5, 5)
- activation function = tanh
- stride              = 1
- padding             = valid\
***-> output shape    = (10, 10, 16)***

In [10]:
lenet5.add(Layers.Convo(num_filter=16, kernel_size=(5, 5), activation='tanh', stride=1, padding='valid', input_shape=(1, 14, 14, 6))) # ! layer 2

genearate filters


**Layer 3 : Subsampling Layer (Average Pooling)** 
- input shape = (10, 10, 16)
- kernel size = (2, 2)
- stride = 2
- padding = valid\
***-> output shape = (5, 5, 16)***

In [11]:
lenet5.add(Layers.AvgPool(kernel_size=(2, 2), stride=2, padding='valid')) # ! layer 3

**Layer 4 : Convolutional Layer**
- input shape         = (5, 5, 16)
- number of filters   = 120
- kernel size         = (5, 5)
- activation function = tanh
- stride              = 1
- padding             = valid\
***-> output shape    = (1, 1, 120)***

In [12]:
lenet5.add(Layers.Convo(num_filter=120, kernel_size=(5, 5), activation='tanh', stride=1, padding='valid', input_shape=(1, 5, 5, 16))) # ! layer 4

genearate filters


**Layer 5 : Flatten Layer**
- input shape         = (1, 1, 120)\
***-> output shape    = (120, 1)***

In [13]:
lenet5.add(Layers.Flatten()) # ! layer 5

**Layer 6 : Dense Layer**
- input shape         = (120, 1)
- dimension           = 84
- activation function = tanh
- train bias          = True\
***-> output shape    = (84, 1)***

In [14]:
lenet5.add(Layers.Dense(dim=(120, 84), activation='tanh', train_bias=True, xavier_uniform=True)) # ! layer 6

generate weights


**Layer 7 : Dense Layer (Output)**
- input shape         = (84, 1)
- dimension           = 84
- activation function = Softmax
- train bias          = True\
***-> output shape    = (10, 1)***

In [15]:
lenet5.add(Layers.Dense(dim=(84, 10), activation='softmax', train_bias=True, xavier_uniform=True)) # ! layer 7

generate weights


_Total number of layers : 8_

## Training

In [16]:
def train(x_train, y_train, x_test, y_test, learning_rate = 0.01, epochs = 100, batch_size = 32):
    from tqdm import tqdm
    optim = SGD(model=lenet5, learning_rate=learning_rate)
    accuracy_points = []    
    loss_points = []
    for epoch in tqdm(range(epochs), desc='Epochs'):
        random_index = np.random.choice(x_train.shape[0], size=batch_size, replace=False)
        for i in random_index:
            xi = x_train[i, :].reshape(1, 32, 32, 1)
            yi = y_train[i, :].reshape(10, 1)
            
            # ! foward pass
            lenet5.forward_pass(xi)
            # ! backward pass
            lenet5.backpropagation(yi)
            # ! update parameters
            optim.step()
            
            
        # if epoch % 10 == 0:
        #     y_pred = lenet5.predict(x_train)
        #     accuracy_point = lenet5.accuracy(y_train, y_pred)
        #     accuracy_points.append(accuracy_point * 100)
        #     loss_point = lenet5.cross_entropy_loss(y_train, y_pred)
        #     loss_points.append(loss_point)
        
    
    print('-----------------------------------------------------------')
    y_pred = lenet5.predict(x_train)
    accuracy = lenet5.accuracy(y_train, y_pred)
    print("\ntrain accuracy :", accuracy)
    loss = lenet5.cross_entropy_loss(y_train, y_pred)
    print("train cross-entropy loss :", loss)
    print('-----------------------------------------------------------')
    y_pred_test = lenet5.predict(x_test)
    accuracy_test = lenet5.accuracy(y_test, y_pred_test)
    print("\ntest accuracy :", accuracy_test)
    loss_test = lenet5.cross_entropy_loss(y_test, y_pred_test)
    print("test cross-entropy loss :", loss_test)
    print('-----------------------------------------------------------')
    
    return accuracy_points, loss_points

In [17]:
epoch = 250
acc, loss = train(x_train, y_train, x_test, y_test, learning_rate=0.05, epochs=epoch, batch_size=64)

Epochs:   0%|          | 0/250 [00:00<?, ?it/s]

Epochs: 100%|██████████| 250/250 [29:11<00:00,  7.01s/it] 


-----------------------------------------------------------

train accuracy : 0.9366666666666666
train cross-entropy loss : 8.29768173238267
-----------------------------------------------------------

test accuracy : 0.83
test cross-entropy loss : 6.444873591553233
-----------------------------------------------------------


In [18]:
def plot_acc_loss(epoch, acc, loss):
    plt.plot([i for i in range(0, epoch, 10)], acc)
    plt.plot([i for i in range(0, epoch, 10)], loss)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy/Loss')
    plt.show()

In [19]:
# plot_acc_loss(epoch, acc, loss)