<a href="https://colab.research.google.com/github/podo47/DL_HW2_Handcraft_LeNet5-Computational_Graph/blob/main/DL_HW2_Handcraft_Lenet5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LeNet-5

## Mount to drive

In [None]:
from google.colab import drive
drive.mount('/content/drive') 

Mounted at /content/drive


## Import libraries

In [None]:
import os
import math
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from PIL import Image
import cv2
from multiprocessing.pool import ThreadPool
import time
import pickle
from tqdm import tqdm

In [None]:
import warnings
warnings.filterwarnings('ignore')

## Part 1 : Data preprocessing

### Import data

In [None]:
train = pd.read_csv('/content/drive/MyDrive/images/train.txt', sep=" ",header=None)
train_dir = np.array(train[0])
train_y = np.array(train[1])

valid = pd.read_csv('/content/drive/MyDrive/images/val.txt', sep=" ",header=None)
valid_dir = np.array(valid[0])
valid_y = np.array(valid[1])

test = pd.read_csv('/content/drive/MyDrive/images/test.txt', sep=" ",header=None)
test_dir = np.array(test[0])
test_y = np.array(test[1])

### Read images to array

In [None]:
def read_image(imgname):
    img = cv2.imread('/content/drive/MyDrive/'+imgname)
    img = cv2.resize(img, (28, 28))
    return img

def read_images_to_array(data_dir):
    pool = ThreadPool(processes=2) # 指定使用 2 個進程
    X = pool.map(read_image, data_dir)
    pool.close()
    pool.join()
    X = np.array(X)
    return X

#### Save data (can skip)

In [None]:
# To save time, you can just skip this step, as the output has already been stored

X_train = read_images_to_array(train_dir)
X_valid = read_images_to_array(valid_dir)
X_test = read_images_to_array(test_dir)


In [None]:
# save them to a file
np.savez("/content/drive/MyDrive/images/rgb_dataset.npz", traindata=X_train, validdata=X_valid, testdata=X_test)

#### Load data

In [None]:
# To save time, you can just skip this step, as the output has already been stored

with np.load("/content/drive/MyDrive/images/rgb_dataset.npz") as data:
    X_train = data["traindata"]
    X_valid = data["validdata"]
    X_test = data["testdata"]

In [None]:
print('X_train shape : ', X_train.shape)
print('X_valid shape : ',X_valid.shape)
print('X_test shape : ',X_test.shape)

X_train shape :  (63325, 28, 28, 3)
X_valid shape :  (450, 28, 28, 3)
X_test shape :  (450, 28, 28, 3)


### Mini-batch

In [None]:
# generate random-shuffled mini-batches
def random_mini_batches(image, label, mini_batch_size = 256):
    dataset_size = image.shape[0] # number of training examples
    mini_batches = []
    # shuffle (image, label)
    permutation = list(np.random.permutation(dataset_size))
    shuffled_image = image[permutation, :, :, :]
    shuffled_label = label[permutation]
    # partition (shuffled_image, shuffled_label). Minus the end case.
    complete_minibatches_number = math.floor(dataset_size / mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, complete_minibatches_number):
        mini_batch_image = shuffled_image[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :, :, :]
        mini_batch_label = shuffled_label[k * mini_batch_size: k * mini_batch_size + mini_batch_size]
        mini_batch = (mini_batch_image, mini_batch_label)
        mini_batches.append(mini_batch)
    # handle the end case (last mini-batch < mini_batch_size)
    if dataset_size % mini_batch_size != 0:
        mini_batch_image = shuffled_image[complete_minibatches_number * mini_batch_size: dataset_size, :, :, :]
        mini_batch_label = shuffled_label[complete_minibatches_number * mini_batch_size: dataset_size]
        mini_batch = (mini_batch_image, mini_batch_label)
        mini_batches.append(mini_batch)
    return mini_batches

### Data preprocessing

#### Zero pad

In [None]:
# padding for the matrix of images
def zero_pad(X, pad):
    X_pad = np.pad(X, ((0, ), (pad, ), (pad, ), (0, )), "constant", constant_values = (0, 0))
    return X_pad

#### Normalization

In [None]:
# normalise the dataset
def normalise(image):
    image -= image.min()
    image = image / image.max()
    image = (image - np.mean(image)) / np.std(image)
    return image

#### Load dataset

In [None]:
def load_dataset(X_dataset, label):
    # data preprocessing
    image_normalised_pad = normalise(zero_pad(X_dataset, 2))
    return (image_normalised_pad, label)

## Part 2 : CNN Layer

#### Initialisation 

In [None]:
# Initialisation of the weights & bias
def initialise(kernel_shape, sigma = 0.01, bias_factor = 0.001):
    bias_shape = (1, 1, 1, kernel_shape[-1]) if len(kernel_shape) == 4 else (kernel_shape[-1], )
    weight = np.random.normal(0, sigma, kernel_shape)
    bias = np.ones(bias_shape) * bias_factor
    return weight, bias

#### Softmax activation function

In [None]:
# Softmax activation function for the output layer
def softmax(X):
    X_softmax = np.exp(X) / np.array([np.sum(np.exp(X), axis = 1)]).T
    return X_softmax

#### Convolution Layer

In [None]:
class Conv_Layer:
    def __init__(self, kernel_shape, stride = 1, pad = 0, sigma = 0.01, bias_factor = 0.001):
        self.weight, self.bias = initialise(kernel_shape, sigma, bias_factor)
        self.stride = stride
        self.pad = pad
    
    def forward_propagation(self, input_map):
        self.input_map = input_map
        batch_size, height_input, width_input, _ = input_map.shape
        f, _, _, channel_output = self.weight.shape
        height_output = int((height_input + 2 * self.pad - f) / self.stride + 1)
        width_output = int((width_input + 2 * self.pad - f) / self.stride + 1)
        output_map = np.zeros((batch_size, height_output, width_output, channel_output))
        input_map_pad = zero_pad(input_map, self.pad)
        for height in range(height_output):
            for width in range(width_output):
                vertical_start, vertical_end = height * self.stride, height * self.stride + f
                horizontal_start, horizontal_end = width * self.stride, width * self.stride + f
                input_map_slice = input_map_pad[:, vertical_start: vertical_end, horizontal_start: horizontal_end, :]
                output_map[:, height, width, :] = np.tensordot(input_map_slice, self.weight, axes = ([1, 2, 3], [0, 1, 2])) + self.bias
        return output_map
    
    def back_propagation(self, d_output_map, learning_rate):
        f, _, _, channel_output = self.weight.shape
        _, height_output, width_output, channel_output = d_output_map.shape
        d_input_map = np.zeros(self.input_map.shape)
        d_weight = np.zeros(self.weight.shape)
        d_bias = np.zeros((1, 1, 1, channel_output))
        if self.pad != 0:
            input_map_pad = zero_pad(self.input_map, self.pad)
            d_input_map_pad = zero_pad(d_input_map, self.pad)
        else:
            input_map_pad = self.input_map
            d_input_map_pad = d_input_map
        for height in range(height_output):
            for width in range(width_output):
                vertical_start, vertical_end = height * self.stride, height * self.stride + f
                horizontal_start, horizontal_end = width * self.stride, width * self.stride + f
                input_map_slice = input_map_pad[:, vertical_start: vertical_end, horizontal_start: horizontal_end, :]
                d_input_map_pad[:, vertical_start: vertical_end, horizontal_start: horizontal_end, :] += np.transpose(np.dot(self.weight, d_output_map[:, height, width, :].T), (3, 0, 1, 2))
                d_weight += np.dot(np.transpose(input_map_slice, (1, 2, 3, 0)), d_output_map[:, height, width, :])
                d_bias += np.sum(d_output_map[:, height, width, :], axis = 0)
        d_input_map = d_input_map_pad if self.pad == 0 else d_input_map_pad[:, self.pad: -self.pad, self.pad: -self.pad, :]
        self.weight -= learning_rate * d_weight
        self.bias -= learning_rate * d_bias
        self.input_map = None
        return d_input_map

#### Sigmoid Activation Layer

In [None]:
class Sigmoid_Layer:
    def forward_propagation(self, input_map):
        self.output_map = 1 / (1 + np.exp(-input_map))
        return self.output_map

    
    def back_propagation(self, d_output_map):
        d_input_map = np.multiply(d_output_map, np.multiply(self.output_map, 1 - self.output_map))
        self.output_map = None
        return d_input_map

#### Max-Pooling Layer

In [None]:
class MaxPool_Layer:
    def __init__(self, stride = 2, f = 2):
        self.stride = stride
        self.f = f

    def forward_propagation(self, input_map):
        self.input_map = input_map
        batch_size, height_input, width_input, channel = input_map.shape
        height_output = int(1 + (height_input - self.f) / self.stride)
        width_output = int(1 + (width_input - self.f) / self.stride)
        output_map = np.zeros((batch_size, height_output, width_output, channel))
        for height in range(height_output):
            for width in range(width_output):
                vertical_start, vertical_end = height * self.stride, height * self.stride + self.f
                horizontal_start, horizontal_end = width * self.stride, width * self.stride + self.f
                input_map_slice = input_map[:, vertical_start: vertical_end, horizontal_start: horizontal_end, :]
                output_map[:, height, width, :] = np.max(input_map_slice, axis = (1, 2))
        return output_map

    def back_propagation(self, d_output_map):
        _, height_output, width_output, _ = d_output_map.shape
        d_input_map = np.zeros(self.input_map.shape)
        for height in range(height_output):
            for width in range(width_output):
                vertical_start, vertical_end = height * self.stride, height * self.stride + self.f
                horizontal_start, horizontal_end = width * self.stride, width * self.stride + self.f
                input_map_slice = self.input_map[:, vertical_start: vertical_end, horizontal_start: horizontal_end, :]
                input_map_slice = np.transpose(input_map_slice, (1, 2, 3, 0))
                mask = input_map_slice == input_map_slice.max((0, 1))
                mask = np.transpose(mask, (3, 2, 0, 1))
                d_input_map[:, vertical_start: vertical_end, horizontal_start: horizontal_end, :] += np.transpose(np.multiply(d_output_map[:, height, width, :][:, :, np.newaxis, np.newaxis], mask), (0, 2, 3, 1))
        self.input_map = None
        return d_input_map

#### Fully Connected Layer

In [None]:
class FC_Layer:
    def __init__(self, weight_shape, sigma = 0.1, bias_factor = 0.01):
        self.weight, self.bias = initialise(weight_shape, sigma, bias_factor)

    def forward_propagation(self, input_array):
        self.input_array = input_array
        return np.matmul(input_array, self.weight) + self.bias

    def back_propagation(self, d_output_array, learning_rate):
        d_input_array = np.matmul(d_output_array, self.weight.T)
        d_weight = np.matmul(self.input_array.T, d_output_array)
        d_bias = np.sum(d_output_array.T, axis = 1)
        self.weight -= learning_rate * d_weight
        self.bias -= learning_rate * d_bias
        self.input_array = None
        return d_input_array

#### Fully Connected Output Layer

In [None]:
class FC_Output_Layer:
    def __init__(self, weight_shape, sigma = 0.1, bias_factor = 0.01):
        self.weight, self.bias = initialise(weight_shape, sigma, bias_factor)
    
    def forward_propagation(self, input_array, labels, mode):
        self.input_array = input_array
        self.labels = labels
        self.output_array = np.matmul(input_array, self.weight) + self.bias
        output = softmax(self.output_array)
        predictions = np.argmax(output, axis = 1)
        if mode == "train":
            cost_value = -np.log(output[range(output.shape[0]), labels])
            return np.sum(cost_value)
        elif mode == "test":
            acc = np.sum(labels == predictions)
            return acc, predictions
    
    def back_propagation(self, learning_rate):
        d_output_array = softmax(self.output_array)
        d_output_array[range(d_output_array.shape[0]), self.labels] -= 1
        d_output_array = d_output_array / d_output_array.shape[0]
        d_input_array = np.matmul(d_output_array, self.weight.T)
        d_weight = np.matmul(self.input_array.T, d_output_array)
        d_bias = np.sum(d_output_array.T, axis = 1)
        self.weight -= learning_rate * d_weight
        self.bias -= learning_rate * d_bias
        self.input_array, self.labels, self.output_array = None, None, None
        return d_input_array

## Part 3 : LeNet-5

In [19]:
class LeNet5:
    def __init__(self):

        kernel_shape = {"C1": (5, 5, 3, 6),
                        "C3": (5, 5, 6, 16),
                        "C5": (5, 5, 16, 120),
                        "F6": (120, 84),
                        "F7": (84, 50)}

        self.C1 = Conv_Layer(kernel_shape["C1"], sigma = 0.1, bias_factor = 0.01)
        self.Sigmoid1 = Sigmoid_Layer()
        self.S2 = MaxPool_Layer()
        self.C3 = Conv_Layer(kernel_shape["C3"], sigma = 0.1, bias_factor = 0.01)
        self.Sigmoid2 = Sigmoid_Layer()
        self.S4 = MaxPool_Layer()
        self.C5 = Conv_Layer(kernel_shape["C5"], sigma = 0.1, bias_factor = 0.01)
        self.Sigmoid3 = Sigmoid_Layer()
        self.F6 = FC_Layer(kernel_shape["F6"], sigma = 0.1, bias_factor = 0.01)
        self.Sigmoid4 = Sigmoid_Layer()
        self.F7 = FC_Output_Layer(kernel_shape["F7"], sigma = 0.1, bias_factor = 0.01)

    def forward_propagation(self, input_image, input_label, mode):
        C1_FP = self.C1.forward_propagation(input_image)
        Sigmoid1_FP = self.Sigmoid1.forward_propagation(C1_FP)
        S2_FP = self.S2.forward_propagation(Sigmoid1_FP)
        C3_FP = self.C3.forward_propagation(S2_FP)
        Sigmoid2_FP = self.Sigmoid2.forward_propagation(C3_FP)
        S4_FP = self.S4.forward_propagation(Sigmoid2_FP)
        C5_FP = self.C5.forward_propagation(S4_FP)
        Sigmoid3_FP = self.Sigmoid3.forward_propagation(C5_FP)
        Sigmoid3_FP = Sigmoid3_FP[:, 0, 0, :]
        F6_FP = self.F6.forward_propagation(Sigmoid3_FP)
        Sigmoid4_FP = self.Sigmoid4.forward_propagation(F6_FP)
        return self.F7.forward_propagation(Sigmoid4_FP, input_label, mode)

    def back_propagation(self, learning_rate):
        F7_BP = self.F7.back_propagation(learning_rate)
        Sigmoid4_BP = self.Sigmoid4.back_propagation(F7_BP)
        F6_BP = self.F6.back_propagation(Sigmoid4_BP, learning_rate)
        F6_BP = F6_BP[:, np.newaxis, np.newaxis, :]
        Sigmoid3_BP = self.Sigmoid3.back_propagation(F6_BP)
        C5_BP = self.C5.back_propagation(Sigmoid3_BP, learning_rate)
        S4_BP = self.S4.back_propagation(C5_BP)
        Sigmoid2_BP = self.Sigmoid2.back_propagation(S4_BP)
        C3_BP = self.C3.back_propagation(Sigmoid2_BP, learning_rate)
        S2_BP = self.S2.back_propagation(C3_BP)
        Sigmoid1_BP = self.Sigmoid1.back_propagation(S2_BP)
        self.C1.back_propagation(Sigmoid1_BP, learning_rate)
    
    def extract_model(self):
        temp_model = LeNet5()
        temp_model.C1.weight = self.C1.weight
        temp_model.C1.bias = self.C1.bias
        temp_model.C1.stride = self.C1.stride
        temp_model.C1.pad = self.C1.pad
        temp_model.S2.stride = self.S2.stride
        temp_model.S2.f = self.S2.f
        temp_model.C3.weight = self.C3.weight
        temp_model.C3.bias = self.C3.bias
        temp_model.C3.stride = self.C3.stride
        temp_model.C3.pad = self.C3.pad
        temp_model.S4.stride = self.S4.stride
        temp_model.S4.f = self.S4.f
        temp_model.C5.weight = self.C5.weight
        temp_model.C5.bias = self.C5.bias
        temp_model.C5.stride = self.C5.stride
        temp_model.C5.pad = self.C5.pad
        temp_model.F6.weight = self.F6.weight
        temp_model.F6.bias = self.F6.bias
        temp_model.F7.weight = self.F7.weight
        temp_model.F7.bias = self.F7.bias
        return temp_model

## Part 4 : Train

In [20]:
def train(model, train_data, valid_data, epoches, learning_rate_list, batch_size):
    # training loops
    start_time = time.time()
    acc_rate_list = []

    for epoch in range(0, epoches):
        print("---------- epoch", epoch + 1, "begin ----------")
        learning_rate = learning_rate_list[epoch]
        # print information
        print("learning rate: {}".format(learning_rate))
        print("batch size: {}".format(batch_size))
        # loop over each batch
        start_time_epoch = time.time()
        cost = 0
        mini_batches = random_mini_batches(train_data[0], train_data[1], batch_size)
        print("Training:")
        for i in tqdm(range(len(mini_batches))):
            batch_image, batch_label = mini_batches[i]
            loss = model.forward_propagation(batch_image, batch_label, 'train')
            cost += loss
            model.back_propagation(learning_rate)
        print("Done, total cost of epoch {}: {}".format(epoch + 1, cost))
        
        acc_train, _ = model.forward_propagation(train_data[0], train_data[1], 'test')
        acc_valid, _ = model.forward_propagation(valid_data[0], valid_data[1], 'test')


        acc_rate_list.append([acc_train / len(train_data[1]), acc_valid / len(valid_data[1])])
        print("0/1 Accuracy of training set:", acc_train, "/", len(train_data[1]))
        print("0/1 Accuracy of valid set:", acc_valid, "/", len(valid_data[1]))
        print("Time used:", time.time() - start_time_epoch, "sec")
        print("---------- epoch", epoch + 1, "end ------------")
        with open("/content/drive/MyDrive/DL/HW2/LeNet5_model/lenet5_data_" + str(epoch + 1) + ".pkl", "wb") as output:
            pickle.dump(model.extract_model(), output, pickle.HIGHEST_PROTOCOL)
    acc_rate_list = np.array(acc_rate_list).T
    print("Total time used:", time.time() - start_time, "sec")
  
    return acc_rate_list

## Part 5 : Test

In [21]:
def test(model_path, test_data):
    # read model
    with open(model_path, "rb") as model_file:
        model = pickle.load(model_file)
    print("Testing with {}:".format(model_path))
    acc, predictions = model.forward_propagation(test_data[0], test_data[1], "test")
    print("Accuracy of test set:", acc / len(predictions))

    return acc / len(predictions), predictions

## Part 6 : Train Model and Draw Performance
To save time, you can just skip this step, as the pretrained has already been stored in file "LeNet5_model"

In [22]:
train_data = load_dataset(X_train, train_y)
valid_data = load_dataset(X_valid, valid_y)
test_data = load_dataset(X_test, test_y)

In [23]:
batch_size = 64
epoches = 25
learning_rate_list = np.array([1e-1] * 5 + [5e-2] * 20 )
model = LeNet5()

In [None]:
# Training
acc_list = train(model, train_data, valid_data, epoches, learning_rate_list, batch_size)

In [None]:
x = np.arange(1, epoches + 1)
plt.xlabel("epoches")
plt.ylabel("Accuracy")
plt.plot(x, acc_list[0])
plt.plot(x, acc_list[1])
plt.grid(True)
plt.legend(["Training data", "Validaton data"], loc = "upper left")
plt.savefig("/content/drive/MyDrive/DL/HW2/LeNet5_model/LeNet5_accuracy.png")
plt.show()

**Save accuracy result (can skip)**

In [None]:
# save them to a file
np.savez("/content/drive/MyDrive/DL/HW2/LeNet5_model/acc_list.npz", train_acc=acc_list[0], valid_acc=acc_list[1])

## Part 7 : Result

**Load train and valid accuracy**

In [None]:
with np.load("/content/drive/MyDrive/DL/HW2/LeNet5_model/acc_list.npz") as data:
    acc_train = data["train_acc"]
    acc_valid = data["valid_acc"]

In [None]:
def answer(test_y, test_y_pred, data_name, output_name):
  df = np.column_stack((test_y, test_y_pred))
  test_result = pd.DataFrame(df, index=data_name, columns=['Answer','Prediction'])
  test_result.to_csv('/content/drive/MyDrive/DL/HW2/LeNet5_model/' + output_name + '_test_result.csv')
  return test_result

**Test**

In [None]:
LeNet5_test_acc, LeNet5_y_pred = test("/content/drive/MyDrive/DL/HW2/LeNet5_model/lenet5_data_" + str(acc_valid.argmax() + 1) + ".pkl", test_data)

Testing with /content/drive/MyDrive/DL/HW2/LeNet5_model/lenet5_data_24.pkl:
Accuracy of test set: 0.14444444444444443


In [None]:
# Test accuracy
LeNet5_test_acc = round(LeNet5_test_acc,4)
LeNet5_test_acc

0.1444

In [None]:
output_name = 'LeNet5'
LeNet5_answer = answer(test_y, LeNet5_y_pred, test_dir, output_name)
LeNet5_answer

Unnamed: 0,Answer,Prediction
images/n02111277/n02111277_9420.JPEG,0,48
images/n02111277/n02111277_9422.JPEG,0,32
images/n02111277/n02111277_9484.JPEG,0,41
images/n02111277/n02111277_951.JPEG,0,41
images/n02111277/n02111277_9518.JPEG,0,46
...,...,...
images/n02172182/n02172182_974.JPEG,49,44
images/n02172182/n02172182_9765.JPEG,49,34
images/n02172182/n02172182_9789.JPEG,49,49
images/n02172182/n02172182_98.JPEG,49,41
