# **Homework 3 - Convolutional Neural Network (CNN)**

In [None]:
# 加载谷歌硬盘
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# 添加此选项、不然gdown从Google Drive 中下载会报错
# !pip install --upgrade --no-cache-dir gdown

### 下载数据

In [1]:
# Download the dataset
# You may choose where to download the data.

# Google Drive
# !gdown --id '1awF7pZ9Dz7X1jn1_QAiKN-_v56veCEKy' --output food-11.zip

# Dropbox
# !wget https://www.dropbox.com/s/m9q6273jl3djall/food-11.zip -O food-11.zip


# MEGA
# !sudo apt install megatools
 #!megadl "https://mega.nz/#!zt1TTIhK!ZuMbg5ZjGWzWX1I6nEUbfjMZgCmAgeqJlwDkqdIryfg"

# Unzip the dataset.  This may take some time.
# !unzip -q food-11.zip

SyntaxError: invalid syntax (<ipython-input-1-8ed2f1d26979>, line 8)

### 数据集、数据load器、图像转换

In [None]:
# Import necessary packages.
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset
from torchvision.datasets import DatasetFolder
import matplotlib.pyplot as plt

# This is for the progress bar.
# 进度条使用的
from tqdm import tqdm


##### 数据转换

In [None]:
# It is important to do data augmentation in training.
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
])

# We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])


#### 数据集、数据集加载器

In [None]:
#批量大小、一个合适的批量号、可以获得一个稳定的梯度下降
batch_size = 128

# loader告诉DatasetFolder如何去读取数据
train_set = DatasetFolder('food-11/training/labeled', loader= lambda x: Image.open(x), extensions='jpg', transform=train_tfm)
valid_set = DatasetFolder('food-11/validation', loader= lambda x: Image.open(x), extensions='jpg', transform=test_tfm)
unlabel_set = DatasetFolder('food-11/training/unlabeled',loader=lambda x: Image.open(x), extensions='jpg', transform=train_tfm)
test_set = DatasetFolder('food-11/testing',loader=lambda x: Image.open(x), extensions='jpg', transform=test_tfm)

# 定义dataloader
train_loader = DataLoader(train_set,batch_size=batch_size,shuffle=True, num_workers=2, pin_memory=True)
valid_loader = DataLoader(valid_set,batch_size=batch_size,shuffle=True, num_workers=2, pin_memory=True)
test_loader = DataLoader(test_set,batch_size=batch_size,shuffle=False)

## 定义你的model
可以选择其他经典model,比如:
model = torchvision.models.resnet18(pretrained=**False**) 

In [None]:
class DIYModel(nn.Module):
    def __init__(self):
        super(DIYModel,self).__init__()
        self.cnn_layers = nn.Sequential(
            # 参数意义: 输入通道数为3、输出通道数为64、卷积核为3、步长为1、填充值为1
            nn.Conv2d(3, 64, 3, 1, 1), # 输出结果: batch_size保持原有的、输出通道数为64、输出通道数为64、卷积核为3、步长为1、填充值为1
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(64, 128, 3, 1, 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),

            nn.Conv2d(128, 256, 3, 1, 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(4, 4, 0),
        )
        self.full_connect_layers = nn.Sequential(
            nn.Linear(256 * 8 * 8, 256),
            nn.ReLU(),
            nn.Linear(256, 256),
            nn.ReLU(),
            nn.Linear(256, 11)
        )

    def forward(self, x):
        # input (x): [batch_size, 3, 128, 128]
        # output: [batch_size, 11]

        # Extract features by convolutional layers.
        x = self.cnn_layers(x)

        # The extracted feature map must be flatten before going to fully-connected layers.
        # 把x变成一维的
        x = x.flatten(1)

        # The features are transformed by fully-connected layers to obtain the final logits.
        # 一共11个种类、所以最后的输出是11个
        x = self.full_connect_layers(x)
        return x

## 训练

In [None]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# 初始化model、并把model放入device中
model = DIYModel().to(device)
model.device = device

# 损失函数、交叉熵、用于分类loss
criterion = nn.CrossEntropyLoss()

# 优化器、计算梯度下降
optim = torch.optim.Adam(model.parameters(),lr=0.005)

# The number of training epochs.
n_epochs = 80

# semi-supervised learning 中想做一些其他事情、则需要就可以
do_semi = False
loss_record = []
valid_record = []
for epoch in range(n_epochs):
    if do_semi:
        # 做一些训练之外的事情
        pseudo_set = get_pseudo_labels(unlabeled_set, model)
        # Construct a new dataset and a data loader for training.
        # This is used in semi-supervised learning only.
        concat_dataset = ConcatDataset([train_set, pseudo_set])
        train_loader = DataLoader(concat_dataset, batch_size=batch_size, shuffle=True, num_workers=8, pin_memory=True)

    # ---------- Training ----------

    # 设置为训练模式
    model.train()
    # These are used to record information in training.
    train_loss = []
    train_accs = []
    for batch in tqdm(train_loader):
        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        logits = model(imgs.to(device))
        loss = criterion(logits, labels.to(device))
        #   前一步的梯度被存储、应该先清除
        optim.zero_grad()
        # 计算梯度
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optim.step()
        #acc为什么还这样计算不了解,
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        train_loss.append(loss.item())
        train_accs.append(acc)
    
    tr_loss = sum(train_loss) / len(train_loss)
    tr_acc  = sum(train_accs) / len(train_accs)
    loss_record.append(tr_loss)
    print(" Trian | epoch: {}, loss: {}, acc: {}".format(epoch+1,tr_loss,tr_acc))

    # ---------- Validation ----------
    valid_loss = []
    valid_acc = []
    # 设定模型为评估类型
    model.eval()
    for batch in tqdm(valid_loader):
        imgs, labels = batch
        with torch.no_grad():
            logits = model(imgs.to(device))
         #计算loss
        loss = criterion(logits, labels.to(device))

        #计算acc
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
        valid_loss.append(loss.item())
        valid_acc.append(acc)
    val_loss = sum(valid_loss) / len(valid_loss)
    val_acc = sum(valid_acc) / len(valid_acc)
    print(" valid | epoch: {}, loss: {}, acc: {}".format(epoch+1,val_loss,val_acc))
    epoch +=1
    # 遇到10的倍数或者最后一次保存模型
    if epoch % 10 ==0 | epoch == n_epochs:
        torch.save(model.state_dict,'./model_hw3')
        print(" save model | epoch: {}, loss: {}, acc: {}".format(epoch,tr_loss,tr_acc))

plt.figure(figsize=(6,4))
plt.plot(range(len(loss_record)), loss_record, c='tab:red', label='train')
plt.plot(range(len(valid_record)), valid_record, c='tab:cyan', label='valid')
plt.ylim(0.0, 5)
plt.ylabel('trian steps')
plt.xlabel('cross loss')
plt.title('Learning curve of train')
plt.legend()
plt.show()