# Just an example.You can alter sample code anywhere.

## Mount your google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# You need to modify this part to the directory where your code is located
%cd /content/drive/MyDrive/ColabNotebooks/DEEP_LEARNING/Lab01

/content/drive/MyDrive/ColabNotebooks/DEEP_LEARNING/Lab01


## Import packages

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import pandas as pd

In [4]:
#Fix the random seed
np.random.seed(0)

## Load the data and label

In [5]:
train_load = np.loadtxt('./data/fmnist-train.csv',delimiter=',',dtype="int")
train_data=train_load[:,1:]
train_label=train_load[:,0]
print("shape of train_data: {}".format(train_data.shape))
print("shape of train_label: {}".format(train_label.shape))

shape of train_data: (60000, 784)
shape of train_label: (60000,)


## Show the training data

In [6]:
# uncomment if you want to show the training data
#plt.figure(figsize=(20, 20))
#for index in range(10):
#    image = train_data[index+20000].reshape(28,28)
#    plt.subplot(2, 5, index+1)
#    plt.imshow(image)
#plt.show()

In [7]:
train_image_num = train_data.shape[0]
train_data = train_data.astype('float32') / 255.0

print("train_image_num  is : {}".format(train_image_num))

train_image_num  is : 60000


## Change numpy array to pytorch tensor

In [8]:
train_data_tensor = torch.from_numpy(train_data)
train_label_tensor = torch.from_numpy(train_label)

## Validation image number

In [9]:
val_image_num=10000

## Convert labels to one hot vector


In [10]:
label_temp = np.zeros((train_image_num, 10), dtype = np.float32)
for i in range(train_image_num):
    label_temp[i][train_label[i]] = 1
train_label_onehot = np.copy(label_temp)
train_label_onehot_tensor = torch.from_numpy(train_label_onehot)
print("One-hot training labels shape:",train_label_onehot.shape)


One-hot training labels shape: (60000, 10)


## Hyperparameters

In [11]:
EPOCH = 30
Batch_size = 50 # 10000 should be divisible by batch_size
Learning_rate = 0.03
WEIGHT_DECAY = 3e-4

## Define the models with pytorch

In [12]:
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class SiLU(nn.Module):
    def forward(self, input_tensor):
        return F.silu(input_tensor)


class GlobalAveragePooling(nn.Module):
    def forward(self, input_tensor):
        return input_tensor.mean(dim = (2, 3))


class Block(nn.Module):
    def __init__(self, oc_size: int, ic_size: int, bn_momentum: float = 0.1):
        super().__init__()
        self.res_scale = 1.0

        self.bn1 = nn.BatchNorm2d(ic_size, eps = 1e-5, momentum = bn_momentum)
        self.act1 = SiLU()
        self.conv1 = nn.Conv2d(ic_size, oc_size, kernel_size = 3, stride = 2, padding = 1, bias = True)

        self.bn2 = nn.BatchNorm2d(oc_size, eps = 1e-5, momentum = bn_momentum)
        self.act2 = SiLU()
        self.conv2 = nn.Conv2d(oc_size, oc_size, kernel_size = 3, stride = 1, padding = 1, bias = True)

        self.conv_residual = nn.Conv2d(ic_size, oc_size, kernel_size = 1, stride = 2, padding = 0, bias = True)

        self.bn3 = nn.BatchNorm2d(oc_size, eps = 1e-5, momentum = bn_momentum)
        self.act3 = SiLU()
        self.conv3 = nn.Conv2d(oc_size, oc_size, kernel_size = 3, stride = 1, padding = 1, bias = True)

        self.bn4 = nn.BatchNorm2d(oc_size, eps = 1e-5, momentum = bn_momentum)
        self.act4 = SiLU()
        self.conv4 = nn.Conv2d(oc_size, oc_size, kernel_size = 3, stride = 1, padding = 1, bias = True)

        for conv_layer in [self.conv1, self.conv2, self.conv_residual, self.conv3, self.conv4]:
            nn.init.kaiming_normal_(conv_layer.weight, mode = "fan_in", nonlinearity = "linear")
            if conv_layer.bias is not None:
                nn.init.zeros_(conv_layer.bias)

        for bn_layer in [self.bn1, self.bn2, self.bn3, self.bn4]:
            nn.init.ones_(bn_layer.weight)
            nn.init.zeros_(bn_layer.bias)

        nn.init.zeros_(self.bn2.weight)
        nn.init.zeros_(self.bn4.weight)

    def forward(self, input_tensor):
        main_path = self.conv1(self.act1(self.bn1(input_tensor)))
        main_path = self.conv2(self.act2(self.bn2(main_path)))
        residual_path = self.conv_residual(input_tensor)
        combined_tensor = residual_path + self.res_scale * main_path

        main_path = self.conv3(self.act3(self.bn3(combined_tensor)))
        main_path = self.conv4(self.act4(self.bn4(main_path)))
        output_tensor = combined_tensor + self.res_scale * main_path
        return output_tensor


class Net(nn.Module):
    def __init__(self, config = (32, 64, 128), num_classes = 10):
        super().__init__()

        channels_stage0, channels_stage1, channels_stage2 = config
        bn_momentum = 0.1

        self.conv_stem = nn.Conv2d(1, channels_stage0, kernel_size = 3, stride = 1, padding = 1, bias = True)

        self.block1 = Block(oc_size = channels_stage0, ic_size = channels_stage0, bn_momentum = bn_momentum)
        self.block2 = Block(oc_size = channels_stage1, ic_size = channels_stage0, bn_momentum = bn_momentum)
        self.block3 = Block(oc_size = channels_stage2, ic_size = channels_stage1, bn_momentum = bn_momentum)

        self.gap = GlobalAveragePooling()
        self.fc = nn.Linear(channels_stage2, num_classes)

        nn.init.kaiming_normal_(self.conv_stem.weight, mode = "fan_in", nonlinearity = "linear")
        if self.conv_stem.bias is not None:
            nn.init.zeros_(self.conv_stem.bias)

        nn.init.kaiming_normal_(self.fc.weight, mode = "fan_in", nonlinearity = "linear")
        if self.fc.bias is not None:
            nn.init.zeros_(self.fc.bias)

    def forward(self, input_flat):
        batch_size, flat_dim = input_flat.shape
        image_side = int(math.isqrt(flat_dim))
        image_tensor = input_flat.view(batch_size, 1, image_side, image_side)

        if self.training:
            batch_size_aug, channels, height, width = image_tensor.shape

            mask_flip = torch.rand(batch_size_aug, device = image_tensor.device) < 0.5
            if mask_flip.any():
                image_tensor[mask_flip] = torch.flip(image_tensor[mask_flip], dims = [-1])

            mask_random_crop = torch.rand(batch_size_aug, device = image_tensor.device) < 0.5
            if mask_random_crop.any():
                padded_tensor = torch.zeros(
                    (mask_random_crop.sum(), channels, height + 4, width + 4),
                    dtype = image_tensor.dtype,
                    device = image_tensor.device,
                )
                padded_tensor[:, :, 2:-2, 2:-2] = image_tensor[mask_random_crop]

                rand_h_offsets = torch.randint(0, 5, (mask_random_crop.sum(),), device = image_tensor.device)
                rand_w_offsets = torch.randint(0, 5, (mask_random_crop.sum(),), device = image_tensor.device)
                for idx, (shift_h, shift_w) in enumerate(zip(rand_h_offsets.tolist(), rand_w_offsets.tolist())):
                    image_tensor[mask_random_crop][idx] = padded_tensor[idx, :, shift_h:shift_h + height, shift_w:shift_w + width]

        features = self.conv_stem(image_tensor)
        features = self.block1(features)
        features = self.block2(features)
        features = self.block3(features)
        pooled_features = self.gap(features)
        logits = self.fc(pooled_features)
        return logits


net = Net()


## Criterion and Optimizer

In [13]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = Learning_rate, momentum = 0.9, weight_decay = WEIGHT_DECAY)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

## Training

In [14]:
import time
from tqdm import tqdm
train_batch_num = (train_image_num - val_image_num )//Batch_size
val_batch_num = (val_image_num)//Batch_size

net.train()

for epoch in range(1, EPOCH+1):
    train_hit = 0
    val_hit = 0
    total_train_loss = 0.0
    total_val_loss = 0.0
    start_time = time.time()

    net.train()
    for it in tqdm(range(train_batch_num), desc=f"Epoch {epoch} - Training"):
        optimizer.zero_grad()
        outputs = net(train_data_tensor[it*Batch_size:(it+1)*Batch_size])
        _, pred_index = torch.max(outputs, 1)
        train_hit += (pred_index == train_label_tensor[it*Batch_size:(it+1)*Batch_size]).sum().item()
        loss = criterion(outputs, train_label_tensor[it*Batch_size:(it+1)*Batch_size])
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

    net.eval()
    with torch.no_grad():
      for titt in tqdm(range(val_batch_num), desc=f"Epoch {epoch} - Validation"):
        tit=train_batch_num+titt
        outputs = net(train_data_tensor[tit*Batch_size:(tit+1)*Batch_size])
        _, pred_index = torch.max(outputs, 1)
        val_hit += (pred_index == train_label_tensor[tit*Batch_size:(tit+1)*Batch_size]).sum().item()
        loss = criterion(outputs, train_label_tensor[tit*Batch_size:(tit+1)*Batch_size])
        total_val_loss += loss.item()
    scheduler.step()

    end_time = time.time()
    epoch_time = end_time - start_time
    print('Task-2  | Epoch:%3d'%epoch, ' |Train Loss:%8.4f'%(total_train_loss/train_batch_num), ' |Train Acc:%3.4f'%(train_hit/(train_image_num-val_image_num)*100.0)
          , ' |Val Loss:%8.4f'%(total_val_loss/val_batch_num), ' |Val Acc:%3.4f'%(val_hit/val_image_num*100.0), ' |Epoch time:%5.2f'%(epoch_time),' sec',
          ' |LR:', scheduler.get_last_lr()[0])

Epoch 1 - Training: 100%|██████████| 1000/1000 [04:07<00:00,  4.04it/s]
Epoch 1 - Validation: 100%|██████████| 200/200 [00:14<00:00, 13.84it/s]


Task-2  | Epoch:  1  |Train Loss:  0.7868  |Train Acc:70.8520  |Val Loss:  0.5040  |Val Acc:80.9800  |Epoch time:261.97  sec  |LR: 0.03


Epoch 2 - Training: 100%|██████████| 1000/1000 [04:03<00:00,  4.11it/s]
Epoch 2 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.76it/s]


Task-2  | Epoch:  2  |Train Loss:  0.4154  |Train Acc:84.9940  |Val Loss:  0.4170  |Val Acc:84.5400  |Epoch time:256.62  sec  |LR: 0.03


Epoch 3 - Training: 100%|██████████| 1000/1000 [04:02<00:00,  4.13it/s]
Epoch 3 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.11it/s]


Task-2  | Epoch:  3  |Train Loss:  0.3541  |Train Acc:87.1980  |Val Loss:  0.4442  |Val Acc:83.2200  |Epoch time:255.54  sec  |LR: 0.03


Epoch 4 - Training: 100%|██████████| 1000/1000 [04:01<00:00,  4.13it/s]
Epoch 4 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.06it/s]


Task-2  | Epoch:  4  |Train Loss:  0.3175  |Train Acc:88.4980  |Val Loss:  0.3839  |Val Acc:85.4800  |Epoch time:255.22  sec  |LR: 0.03


Epoch 5 - Training: 100%|██████████| 1000/1000 [04:05<00:00,  4.08it/s]
Epoch 5 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.62it/s]


Task-2  | Epoch:  5  |Train Loss:  0.2944  |Train Acc:89.2340  |Val Loss:  0.3650  |Val Acc:86.5000  |Epoch time:258.92  sec  |LR: 0.03


Epoch 6 - Training: 100%|██████████| 1000/1000 [04:00<00:00,  4.16it/s]
Epoch 6 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.05it/s]


Task-2  | Epoch:  6  |Train Loss:  0.2801  |Train Acc:89.6660  |Val Loss:  0.2988  |Val Acc:89.0400  |Epoch time:253.41  sec  |LR: 0.03


Epoch 7 - Training: 100%|██████████| 1000/1000 [03:59<00:00,  4.18it/s]
Epoch 7 - Validation: 100%|██████████| 200/200 [00:16<00:00, 12.15it/s]


Task-2  | Epoch:  7  |Train Loss:  0.2647  |Train Acc:90.4040  |Val Loss:  0.2911  |Val Acc:89.3100  |Epoch time:255.83  sec  |LR: 0.03


Epoch 8 - Training: 100%|██████████| 1000/1000 [04:03<00:00,  4.11it/s]
Epoch 8 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.72it/s]


Task-2  | Epoch:  8  |Train Loss:  0.2533  |Train Acc:90.8020  |Val Loss:  0.3071  |Val Acc:88.2400  |Epoch time:256.69  sec  |LR: 0.03


Epoch 9 - Training: 100%|██████████| 1000/1000 [03:58<00:00,  4.19it/s]
Epoch 9 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.04it/s]


Task-2  | Epoch:  9  |Train Loss:  0.2478  |Train Acc:90.8580  |Val Loss:  0.2797  |Val Acc:89.6900  |Epoch time:252.00  sec  |LR: 0.03


Epoch 10 - Training: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]
Epoch 10 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.38it/s]


Task-2  | Epoch: 10  |Train Loss:  0.2395  |Train Acc:91.1660  |Val Loss:  0.3808  |Val Acc:85.4500  |Epoch time:250.71  sec  |LR: 0.003


Epoch 11 - Training: 100%|██████████| 1000/1000 [04:01<00:00,  4.14it/s]
Epoch 11 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.59it/s]


Task-2  | Epoch: 11  |Train Loss:  0.1812  |Train Acc:93.2800  |Val Loss:  0.2346  |Val Acc:92.0200  |Epoch time:255.27  sec  |LR: 0.003


Epoch 12 - Training: 100%|██████████| 1000/1000 [04:00<00:00,  4.16it/s]
Epoch 12 - Validation: 100%|██████████| 200/200 [00:14<00:00, 14.08it/s]


Task-2  | Epoch: 12  |Train Loss:  0.1650  |Train Acc:93.9660  |Val Loss:  0.2318  |Val Acc:92.1200  |Epoch time:254.69  sec  |LR: 0.003


Epoch 13 - Training: 100%|██████████| 1000/1000 [03:58<00:00,  4.20it/s]
Epoch 13 - Validation: 100%|██████████| 200/200 [00:14<00:00, 14.00it/s]


Task-2  | Epoch: 13  |Train Loss:  0.1571  |Train Acc:94.2440  |Val Loss:  0.2323  |Val Acc:92.1200  |Epoch time:252.46  sec  |LR: 0.003


Epoch 14 - Training: 100%|██████████| 1000/1000 [03:58<00:00,  4.20it/s]
Epoch 14 - Validation: 100%|██████████| 200/200 [00:14<00:00, 13.93it/s]


Task-2  | Epoch: 14  |Train Loss:  0.1511  |Train Acc:94.4480  |Val Loss:  0.2267  |Val Acc:92.2600  |Epoch time:252.42  sec  |LR: 0.003


Epoch 15 - Training: 100%|██████████| 1000/1000 [04:00<00:00,  4.15it/s]
Epoch 15 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.01it/s]


Task-2  | Epoch: 15  |Train Loss:  0.1452  |Train Acc:94.7240  |Val Loss:  0.2349  |Val Acc:92.0600  |Epoch time:254.09  sec  |LR: 0.003


Epoch 16 - Training: 100%|██████████| 1000/1000 [04:00<00:00,  4.17it/s]
Epoch 16 - Validation: 100%|██████████| 200/200 [00:12<00:00, 15.41it/s]


Task-2  | Epoch: 16  |Train Loss:  0.1379  |Train Acc:95.0140  |Val Loss:  0.2353  |Val Acc:92.0500  |Epoch time:253.04  sec  |LR: 0.003


Epoch 17 - Training: 100%|██████████| 1000/1000 [03:58<00:00,  4.20it/s]
Epoch 17 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.87it/s]


Task-2  | Epoch: 17  |Train Loss:  0.1346  |Train Acc:95.2220  |Val Loss:  0.2286  |Val Acc:92.2700  |Epoch time:251.62  sec  |LR: 0.003


Epoch 18 - Training: 100%|██████████| 1000/1000 [03:58<00:00,  4.19it/s]
Epoch 18 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.98it/s]


Task-2  | Epoch: 18  |Train Loss:  0.1303  |Train Acc:95.2840  |Val Loss:  0.2449  |Val Acc:92.0400  |Epoch time:251.96  sec  |LR: 0.003


Epoch 19 - Training: 100%|██████████| 1000/1000 [03:58<00:00,  4.19it/s]
Epoch 19 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.56it/s]


Task-2  | Epoch: 19  |Train Loss:  0.1249  |Train Acc:95.5220  |Val Loss:  0.2507  |Val Acc:91.9200  |Epoch time:252.23  sec  |LR: 0.003


Epoch 20 - Training: 100%|██████████| 1000/1000 [03:58<00:00,  4.19it/s]
Epoch 20 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.77it/s]


Task-2  | Epoch: 20  |Train Loss:  0.1193  |Train Acc:95.7440  |Val Loss:  0.2447  |Val Acc:92.0700  |Epoch time:252.20  sec  |LR: 0.00030000000000000003


Epoch 21 - Training: 100%|██████████| 1000/1000 [04:01<00:00,  4.15it/s]
Epoch 21 - Validation: 100%|██████████| 200/200 [00:12<00:00, 15.42it/s]


Task-2  | Epoch: 21  |Train Loss:  0.1040  |Train Acc:96.3800  |Val Loss:  0.2335  |Val Acc:92.4600  |Epoch time:253.99  sec  |LR: 0.00030000000000000003


Epoch 22 - Training: 100%|██████████| 1000/1000 [03:59<00:00,  4.18it/s]
Epoch 22 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.34it/s]


Task-2  | Epoch: 22  |Train Loss:  0.0997  |Train Acc:96.6080  |Val Loss:  0.2327  |Val Acc:92.5500  |Epoch time:253.40  sec  |LR: 0.00030000000000000003


Epoch 23 - Training: 100%|██████████| 1000/1000 [03:59<00:00,  4.18it/s]
Epoch 23 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.30it/s]


Task-2  | Epoch: 23  |Train Loss:  0.0972  |Train Acc:96.7360  |Val Loss:  0.2341  |Val Acc:92.6200  |Epoch time:252.13  sec  |LR: 0.00030000000000000003


Epoch 24 - Training: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]
Epoch 24 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.02it/s]


Task-2  | Epoch: 24  |Train Loss:  0.0952  |Train Acc:96.7520  |Val Loss:  0.2359  |Val Acc:92.4500  |Epoch time:250.75  sec  |LR: 0.00030000000000000003


Epoch 25 - Training: 100%|██████████| 1000/1000 [03:56<00:00,  4.23it/s]
Epoch 25 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.12it/s]


Task-2  | Epoch: 25  |Train Loss:  0.0933  |Train Acc:96.7980  |Val Loss:  0.2369  |Val Acc:92.3700  |Epoch time:249.65  sec  |LR: 0.00030000000000000003


Epoch 26 - Training: 100%|██████████| 1000/1000 [03:56<00:00,  4.23it/s]
Epoch 26 - Validation: 100%|██████████| 200/200 [00:15<00:00, 12.88it/s]


Task-2  | Epoch: 26  |Train Loss:  0.0939  |Train Acc:96.8300  |Val Loss:  0.2383  |Val Acc:92.4500  |Epoch time:252.09  sec  |LR: 0.00030000000000000003


Epoch 27 - Training: 100%|██████████| 1000/1000 [03:57<00:00,  4.20it/s]
Epoch 27 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.18it/s]


Task-2  | Epoch: 27  |Train Loss:  0.0915  |Train Acc:96.9100  |Val Loss:  0.2387  |Val Acc:92.4300  |Epoch time:251.07  sec  |LR: 0.00030000000000000003


Epoch 28 - Training: 100%|██████████| 1000/1000 [03:53<00:00,  4.28it/s]
Epoch 28 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.15it/s]


Task-2  | Epoch: 28  |Train Loss:  0.0896  |Train Acc:96.9940  |Val Loss:  0.2417  |Val Acc:92.2900  |Epoch time:247.08  sec  |LR: 0.00030000000000000003


Epoch 29 - Training: 100%|██████████| 1000/1000 [03:57<00:00,  4.21it/s]
Epoch 29 - Validation: 100%|██████████| 200/200 [00:13<00:00, 15.25it/s]


Task-2  | Epoch: 29  |Train Loss:  0.0901  |Train Acc:97.0080  |Val Loss:  0.2434  |Val Acc:92.2900  |Epoch time:250.67  sec  |LR: 0.00030000000000000003


Epoch 30 - Training: 100%|██████████| 1000/1000 [04:00<00:00,  4.16it/s]
Epoch 30 - Validation: 100%|██████████| 200/200 [00:13<00:00, 14.89it/s]

Task-2  | Epoch: 30  |Train Loss:  0.0876  |Train Acc:97.1020  |Val Loss:  0.2442  |Val Acc:92.3500  |Epoch time:254.01  sec  |LR: 3.0000000000000004e-05



