## mount your google drive

from google.colab import drive
drive.mount('/content/drive')

In [1]:
# You need to modify this part to the directory where your code is located
#%cd "/content/drive/MyDrive/DL_Lab1/"

## Import packages


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import model

In [3]:
#Fix the random seed
np.random.seed(0)

## Load the data and label

In [4]:
train_load = np.loadtxt('./data/fmnist-train.csv',delimiter=',',dtype="int")
test_load = np.loadtxt('./data/fmnist-test.csv',delimiter=',',dtype="int")

train_data=train_load[:,1:]
train_label=train_load[:,0]
test_data=test_load
print("shape of train_data: {}".format(train_data.shape))
print("shape of train_label: {}".format(train_label.shape))
print("shape of test_data: {}".format(test_data.shape))

shape of train_data: (60000, 784)
shape of train_label: (60000,)
shape of test_data: (10000, 784)


There are 60000 photos in flatten pixels form

## Show the training data

In [5]:
#uncomment if you want to show the training data
#plt.figure(figsize=(20, 20))
#for index in range(10):
#    image = train_data[index+1000].reshape(28,28)
#    plt.subplot(2, 5, index+1)
#    plt.imshow(image)
#plt.show()

In [6]:
train_image_num = train_data.shape[0]
test_image_num = test_data.shape[0]
train_data = train_data.astype(np.float32)
test_data = test_data.astype(np.float32)

print("train_image_num  is : {}".format(train_image_num))
print("test_image_num   is : {}".format(test_image_num))

train_image_num  is : 60000
test_image_num   is : 10000


## Validation image number

In [7]:
val_image_num=10000

## Convert the training labels to one hot vector

In [8]:
label_temp = np.zeros((train_image_num, 10), dtype = np.float32)
for i in range(train_image_num):
    label_temp[i][train_label[i]] = 1
train_label_onehot = np.copy(label_temp)
print("One-hot training labels shape:",train_label_onehot.shape)



One-hot training labels shape: (60000, 10)


## Hyperparameters

In [11]:
EPOCH = 20
Batch_size = 10000 # 10000 should be divisible by batch_size
Learning_rate = 0.01

## Training

In [12]:
import time
# please make sure you have place layer.py & network.py in 'model' folder
in_channels = 1
conv_out_channels = 8
kernel_size = 3
num_classes = 10
fc_hidden_size = 128

net = model.Network(in_channels, conv_out_channels, kernel_size, num_classes, fc_hidden_size)

train_batch_num = (train_image_num  -  val_image_num  )//Batch_size
val_batch_num = (val_image_num)//Batch_size

for epoch in range(1, EPOCH+1):
    train_hit = 0
    val_hit = 0
    total_train_loss = 0.0
    total_val_loss = 0.0
    start_time = time.time()
    for it in range(train_batch_num):
        print(train_label_onehot[it*Batch_size:(it+1)*Batch_size].shape)
        pred, train_loss = net.forward(train_data[it*Batch_size:(it+1)*Batch_size], train_label_onehot[it*Batch_size:(it+1)*Batch_size])
        print('predicted label shape:', pred.shape)
        pred_index = np.argmax(pred, axis=0)
        print('predicted index shape:', pred_index.shape)
        train_hit += (pred_index==train_label[it*Batch_size:(it+1)*Batch_size]).sum()
        total_train_loss += train_loss

        #Done running above code

        net.backward()
        net.update(Learning_rate)

    for titt in range(val_batch_num):
        tit=train_batch_num+titt
        pred, val_loss = net.forward(train_data[tit*Batch_size:(tit+1)*Batch_size], train_label_onehot[tit*Batch_size:(tit+1)*Batch_size])
        pred_index = np.argmax(pred, axis=0)
        val_hit += (pred_index==train_label[tit*Batch_size:(tit+1)*Batch_size]).sum()
        total_val_loss += val_loss
    end_time = time.time()
    epoch_time = end_time - start_time
    print('Task-1  | Epoch:%3d'%epoch, ' |Train Loss:%8.4f'%(total_train_loss/train_batch_num), ' |Train Acc:%3.4f'%(train_hit/(train_image_num-val_image_num)*100.0)
          , ' |Val Loss:%8.4f'%(total_val_loss/val_batch_num), ' |Val Acc:%3.4f'%(val_hit/val_image_num*100.0), ' |Epoch time:%5.2f'%(epoch_time),' sec')

(10000, 10)
finish running layer:  <model.layer.Reshape object at 0x107e825b0>
current output shape:  (1, 28, 28, 10000)
finish running layer:  <model.layer.ConvolutionLayer object at 0x107e82550>
current output shape:  (8, 28, 28, 10000)
finish running layer:  <model.layer.Activation1 object at 0x107e82520>
current output shape:  (8, 28, 28, 10000)
finish running layer:  <model.layer.Flatten object at 0x107e7e2b0>
current output shape:  (6272, 10000)
finish running layer:  <model.layer.FullyConnected object at 0x107e7e2e0>
current output shape:  (128, 10000)
finish running layer:  <model.layer.Activation1 object at 0x102f0a3d0>
current output shape:  (128, 10000)
finish running layer:  <model.layer.FullyConnected object at 0x103418b50>
current output shape:  (10, 10000)
predicted label shape: (10, 10000)
predicted index shape: (10000,)
Finish running layer:  <model.layer.FullyConnected object at 0x103418b50>
current grad shape:  (128, 10000)
Finish running layer:  <model.layer.Activat

## Dump for evaluation (upload your DL-test-predict.csv to kaggle )

In [13]:
test_pred_list = []
total_test = 0
for tit in range(test_image_num//Batch_size):
    pred, _ = net.forward(test_data[tit*Batch_size:(tit+1)*Batch_size], train_label_onehot[tit*Batch_size:(tit+1)*Batch_size])
    pred_index = np.argmax(pred, axis=0)
    test_pred_list += pred_index.tolist()
    total_test += Batch_size

print('Please make sure that total test images = 10000')
print(f'Total test images: {total_test} ')

print('Dump file...')
df = pd.DataFrame(test_pred_list, columns=["Category"])
df.to_csv('DL-test-predict_1.csv', index=True, index_label="Id")

finish running layer:  <model.layer.Reshape object at 0x107e825b0>
current output shape:  (1, 28, 28, 10000)
finish running layer:  <model.layer.ConvolutionLayer object at 0x107e82550>
current output shape:  (8, 28, 28, 10000)
finish running layer:  <model.layer.Activation1 object at 0x107e82520>
current output shape:  (8, 28, 28, 10000)
finish running layer:  <model.layer.Flatten object at 0x107e7e2b0>
current output shape:  (6272, 10000)
finish running layer:  <model.layer.FullyConnected object at 0x107e7e2e0>
current output shape:  (128, 10000)
finish running layer:  <model.layer.Activation1 object at 0x102f0a3d0>
current output shape:  (128, 10000)
finish running layer:  <model.layer.FullyConnected object at 0x103418b50>
current output shape:  (10, 10000)
Please make sure that total test images = 10000
Total test images: 10000 
Dump file...
