# **Homework 3 - Convolutional Neural Network**

引用套件： https://pytorch.org/docs/stable/torchvision/models.html


## **goal**

 ----- strong baseline -----   0.79318

----- simple baseline -----   0.71727
      
可以改的地方 (超參數)：                                                         
        #   1. number of filters                                               
        #   2. con_mask size
        #   3. maxpool_size
        #   4. convolution 次數
        #   5. epoch 次數
        #   6. + dropout
        #   7. learning_rate
        #   8. batch_size
        #   9. 用不同的模型架構方法 (ex.resnet,....)

# 從雲端下載我們的資料庫 (food-11)，讀者可以自行下載至本機或自己的雲端

In [None]:
!gdown --id '19CzXudqN58R3D-1G8KeFWk8UDQwlb8is' --output food-11.zip # 下載資料集
!unzip food-11.zip # 解壓縮

In [2]:
# Import 需要的套件
import os
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision
import pandas as pd
from torch.utils.data import DataLoader, Dataset
import time
from google.colab import files
import random

In [7]:
# 固定隨機種子
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.
    np.random.seed(seed)  # Numpy module.
    random.seed(seed)  # Python random module.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

same_seeds(0)

#Read image
利用 OpenCV (cv2) 讀入照片並存放在 numpy array 中，這邊之後可以省略，我們可以直接拿取存好的 numpy 作為輸入，就不需要每次都花很多時間讀檔跟轉 numpy

In [3]:
def readfile(path, label):
    # label 是一個 boolean variable，代表需不需要回傳 y 值
    image_dir = sorted(os.listdir(path)) # 把圖檔按照編號排列
    x = np.zeros((len(image_dir), 128, 128, 3), dtype=np.uint8)
    y = np.zeros((len(image_dir)), dtype=np.uint8)
    for i, file in enumerate(image_dir):
        img = cv2.imread(os.path.join(path, file))
        x[i, :, :] = cv2.resize(img,(128, 128))
        if label:
          y[i] = int(file.split("_")[0])
    if label:
      return x, y
    else:
      return x

In [None]:
# 分別將 training set、validation set、testing set 用 readfile 函式讀進來
workspace_dir = './food-11'
print("Reading data")
train_x, train_y = readfile(os.path.join(workspace_dir, "training"), True)

# download train.npy files to local
np.save('train_x.npy', train_x)
np.save('train_y.npy', train_y)
# files.download('train_x.npy')
# files.download('train_y.npy')
print("Size of training data = {}".format(len(train_x)))

# download val.npy files to local
val_x, val_y = readfile(os.path.join(workspace_dir, "validation"), True)
np.save('val_x.npy', val_x)
np.save('val_y.npy', val_y)
# files.download('val_x.npy')
# files.download('val_y.npy')
print("Size of validation data = {}".format(len(val_x)))

# download test.npy files to local
test_x = readfile(os.path.join(workspace_dir, "testing"), False)
np.save('test_x.npy', test_x)
# files.download('test_x.npy')
print("Size of Testing data = {}".format(len(test_x)))

這邊可以省略以上步驟，我們直接拿把 image 存成的 nparray 拿出來讀取

In [None]:
!gdown --id '1-T3KJeY5XN94NHsdya0vH4xJX1TTQt14' --output train_x.npy 
!gdown --id '1-VRyaY86OXEsFGRJnk0A9MstsMB80V1b' --output train_y.npy 
!gdown --id '1-UK5Wjt5VSnbS4GxrTnpvDjMIdKk6znY' --output val_x.npy 
!gdown --id '1-jhA40ps45c6QfpqLK7h1QiLZeAVV4iA' --output val_y.npy 
!gdown --id '1-cKSghWRnFGnq2-tSPmnC8YwAwUfU1ik' --output test_x.npy 
train_x = np.load('train_x.npy')
train_y = np.load('train_y.npy')
val_x = np.load('val_x.npy')
val_y = np.load('val_y.npy')
test_x = np.load('test_x.npy')

# Dataset
在 PyTorch 中，我們可以利用 torch.utils.data 的 Dataset 及 DataLoader 來"包裝" data，使後續的 training 及 testing 更為方便。

Dataset 需要 overload 兩個函數：\_\_len\_\_ 及 \_\_getitem\_\_

\_\_len\_\_ 必須要回傳 dataset 的大小，而 \_\_getitem\_\_ 則定義了當程式利用 [ ] 取值時，dataset 應該要怎麼回傳資料。

實際上我們並不會直接使用到這兩個函數，但是使用 DataLoader 在 enumerate Dataset 時會使用到，沒有實做的話會在程式運行階段出現 error。


In [8]:
# training 時做 Data augmentation
# 一張圖片經過旋轉、調整大小、比例尺寸，或者改變亮度色溫、翻轉等處理後，我們人眼仍能辨識出來是相同的相片，但是對機器來說那可是完全不同的新圖像了，
# 因此， Data augmentation 就是將 dataset中既有的圖片予以修改變形，以創造出更多的圖片來讓機器學習，彌補資料量不足的困擾。

train_transform = transforms.Compose([
    transforms.ToPILImage(), # 轉成 python 圖片
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片，表示在（-15，+15）之間隨機旋轉
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization) ps. Tensor 為多維張量
    # transforms.Normalize(mean = (0.5, 0.5, 0.5), std = (0.5, 0.5, 0.5)) # 歸一化到 [-1, 1]
])

# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])

class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None): # transform 自己決定
    # def __init__(self, x, y=None, transform=True): 
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X

In [9]:
batch_size = 128
train_set = ImgDataset(train_x, train_y, train_transform)
val_set = ImgDataset(val_x, val_y, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

# Model

best_model

In [10]:
# best_model
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
        # class torch.nn.Sequential(*args)
        # 多個模塊按照它們傳入構造函數的順序被加入到神經網路中

            # 讀者可以自行更改網路架構
            # nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128] ps.padding 後從 128 => 130，再從 130-3+1 = 128
            nn.Conv2d(3, 64, 5, 1, 2),  # [64, 128, 128] ps.padding 後從 128 => 132，再從 132-5+1 = 128
            nn.BatchNorm2d(64),
            # 2D Normalization
            # class torch.nn.BatchNorm2d(num_features, eps=1e-05, momentum=0.1, affine=True)，
            # 其中 num_features 為輸入的通道數，BatchNorm2d 計算的是每個通道上的歸一化特徵
            nn.ReLU(),
            # nn.MaxPool2d(2, 2, 0),        # [64, 64, 64] 128/2 = 64

            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 128, 128] ps.padding 後從 128 => 130，再從 130-3+1 = 128
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [128, 64, 64] 128/2 = 64

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32] ps.padding 後從 64 => 66，再從 66-3+1 = 64
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [256, 32, 32] 64/2 = 32

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 32, 32] ps.padding 後從 32 => 34，再從 34-3+1 = 32
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [512, 32, 32] 32/2 = 16
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 16, 16] ps.padding 後從 16 => 18，再從 18-3+1 = 16
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [512, 4, 4] 16/2= 8

            ############################多加層數#################################
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 4, 4] ps.padding 後從 8 => 10，再從 10-3+1 = 8
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 2, 2] 8/2= 4

            nn.Conv2d(512, 512, 3, 1, 1), # [512, 4, 4] ps.padding 後從 4 => 6，再從 6-3+1 = 4
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0)        # [512, 2, 2] 4/2= 2
        )
        self.fc = nn.Sequential(
            # flatten
            nn.Linear(512*2*2, 1024),
            torch.nn.Dropout(0.5),
            nn.ReLU(),
            nn.Linear(1024, 512),
            torch.nn.Dropout(0.5),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

half layer of best_model

In [None]:
# 這邊是負責減少一半層數的 code
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.cnn = nn.Sequential(
        # class torch.nn.Sequential(*args)

            nn.Conv2d(3, 64, 5, 1, 2),  # [64, 128, 128] ps.padding 後從 128 => 132，再從 132-5+1 = 128
            # (5*5*3+1)*64 = 4,864 計算參數量
            nn.BatchNorm2d(64),
            # 128
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [64, 64, 64] 128/2 = 64

            nn.Conv2d(64, 128, 3, 1, 1),  # [128, 128, 128] ps.padding 後從 64 => 66，再從 66-3+1 = 64
            # (3*3*64+1)*128 = 73856
            nn.BatchNorm2d(128),
            # 256
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [128, 64, 64] 128/2 = 64

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32] ps.padding 後從 32 => 34，再從 34-3+1 = 32
            # (3*3*128+1)*256 = 295,168
            nn.BatchNorm2d(256),
            # 512
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [256, 32, 32] 64/2 = 32

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 32, 32] ps.padding 後從 16 => 18，再從 18-3+1 = 16
            # (3*3*256+1)*512 = 1,180,160
            nn.BatchNorm2d(512),
            # 1,024
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),        # [512, 32, 32] 16/2 = 8
        )
        self.fc = nn.Sequential(
            # flatten
            nn.Linear(512*8*8, 256),
            # (512*8*8+1)*256 = 8,388,864
            # torch.nn.Dropout(0.5),
            nn.ReLU(),
            nn.Linear(256, 128),
            # (256+1)*128 = 32896
            # torch.nn.Dropout(0.5),
            nn.ReLU(),
            nn.Linear(128, 11)
            # (128+1)*11 = 1419
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

DNN_Model

In [None]:
# DNN
# 沒有使用 convolution
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.fc = nn.Sequential(
            # flatten
            nn.Linear(3*128*128, 256),
            torch.nn.Dropout(0.5),
            nn.BatchNorm1d(256),            
            nn.ReLU(),
            nn.Linear(256, 64),
            torch.nn.Dropout(0.5),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Linear(64, 11),
        )

    def forward(self, x):
        out = x
        out = out.view(out.size()[0], -1)
        return self.fc(out)

看一下我們使用的 model 裡面的參數資料  
參考資料：https://www.brilliantcode.net/1646/convolutional-neural-networks-3-calculate-number-of-parameters/

In [None]:
model = Classifier().cuda()

from torchsummary import summary
summary(model, input_size=(3, 128, 128))

# Training

使用 training set 訓練，並使用 validation set 尋找好的參數

epochs 介紹：   
epochs 被定義為向前和向後傳播中所有批次的單次訓練疊代。這意味著1個周期是整個輸入數據的單次向前和向後傳遞。簡單說，epochs指的就是訓練過程中數據將被「輪」多少次，就這樣。

舉個例子

訓練集有1000個樣本，batchsize=10，那麼
訓練完整個樣本集需要：100次iteration，1次epoch。   
具體的計算公式為：   
one epoch = numbers of iterations = N = 訓練樣本的數量/batch_size

參考資料：https://kknews.cc/zh-tw/code/kban458.html

In [None]:
model = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 150
best_val_acc = 0

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
        train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
        batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新參數值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()
    
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()
        if(val_acc > best_val_acc):
            best_val_acc = val_acc

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))
      
print('best_val_acc =', best_val_acc/val_set.__len__())   

### 繪製 confusion matrix 
參考資料：  
1. https://deeplizard.com/learn/video/0LhiS6yu2qQ   
2. https://mathpretty.com/10675.html   
3. https://honglung.pixnet.net/blog/post/214669413-%e6%b7%b7%e6%b7%86%e7%9f%a9%e9%99%a3

In [12]:
from sklearn.metrics import classification_report
import itertools
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    # print(cm)

    plt.figure(figsize=(12, 8)) #fix_size
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
model.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(val_loader):
        val_pred = model(data[0].cuda())
        val_label = np.argmax(val_pred.cpu().data.numpy(), axis=1)
        for y in val_label:
            prediction.append(y)
target_names = [ 'Bread','Dairy product' ,'Dessert','Egg', 'Fried food', 'Meat',
                'Noodles/Pasta', 'Rice', 'Seafood', 'Soup', 'Vegetable/Fruit']
plt.figure(1)
cm = confusion_matrix(val_y, prediction)
plot_confusion_matrix(cm, classes=target_names,normalize=True,
                    title='confusion matrix')

plt.show()

得到好的參數後，我們使用 training set 和 validation set 共同訓練（資料量變多，模型效果較好）

In [None]:
batch_size = 128
train_val_x = np.concatenate((train_x, val_x), axis=0)
train_val_y = np.concatenate((train_y, val_y), axis=0)
train_val_set = ImgDataset(train_val_x, train_val_y, train_transform)
train_val_loader = DataLoader(train_val_set, batch_size=batch_size, shuffle=True)

In [None]:
# model.train()
# 啟用 BatchNormalization 和 Dropout
# model.eval()
# 不啟用 BatchNormalization 和 Dropout
# 訓練完 train 樣本後，生成的模型 model 要用來測試樣本。在 model(test) 之前，需要加上 model.eval()，否則的話，有輸入數據，即使不訓練，它也會改變權值。

# 在做one classification的時候，訓練集和測試集的樣本分佈是不一樣的。
model_best = Classifier().cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model_best.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 150

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0

    model_best.train()
    for i, data in enumerate(train_val_loader):
        optimizer.zero_grad()
        train_pred = model_best(data[0].cuda())
        batch_loss = loss(train_pred, data[1].cuda())
        batch_loss.backward()
        optimizer.step()

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

        #將結果 print 出來
    print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f' % \
      (epoch + 1, num_epoch, time.time()-epoch_start_time, \
      train_acc/train_val_set.__len__(), train_loss/train_val_set.__len__()))

把 train 好的 model.npy 參數存到自己的路徑

In [None]:
model_best.eval()
torch.save(model_best.state_dict(), 'train_model_test2_2.pkl')  # 保存整個網路
files.download('train_model_test2_2.pkl')

# Testing
利用剛剛 train 好的 model 進行 prediction

In [None]:
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

In [None]:
model_best.eval()
prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model_best(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            prediction.append(y)

In [None]:
# 將結果寫入 csv 檔
with open("predict.csv", 'w') as f:
    f.write('Id,Category\n')
    for i, y in  enumerate(prediction):
        f.write('{},{}\n'.format(i, y))
        
# 存到本機端
files.download('predict.csv')