## import 

In [9]:
!pip install --upgrade numpy

from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import pandas as pd
from tqdm import tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


##  image pre-processing

In [10]:
# 資料轉換函數
data_transforms = {
    # 訓練資料集採用資料增強與標準化轉換
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224), # 隨機剪裁並縮放
        transforms.RandomHorizontalFlip(), # 隨機水平翻轉
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 標準化
    ]),
    # 驗證資料集僅採用資料標準化轉換
    'val': transforms.Compose([
        transforms.Resize(256),  # 縮放
        transforms.CenterCrop(224), # 中央剪裁
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 標準化
    ]),
}

In [11]:
!pwd

/content/drive/MyDrive/Colab Notebooks/ai cup


## get filename

In [12]:
class ImageFolderWithPaths(datasets.ImageFolder):
    """Custom dataset that includes image file paths. Extends
    torchvision.datasets.ImageFolder
    """

    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

In [13]:
!pwd


/content/drive/MyDrive/Colab Notebooks/ai cup


## load data

In [14]:
  # 資料集載入 =======================================================================
data_dir = './dataset/training'

image_datasets = {        
  x: ImageFolderWithPaths(    #默認已經將不同類型的image分成不同的folder
    os.path.join(data_dir, x), #path
    data_transforms[x]
  ) 
  for x in ['train', 'val']
}

#print(image_datasets['train'].imgs[22][0])  #class:train底下的已經分類好的img, type
                        # imgs[1][0] => 1st img's filename
                        # imgs[1][1] => 1st img's type 
#print(image_datasets['train'].imgs[22][1])
#print(image_datasets['train'].classes) #class:train底下的已經分類好的folder的name
#print(image_datasets['train'][7][1])  #1st: [7]=>7th img(from start)
                    #2nd: [0]=>img information
                    #   [1]=>label(folder name)

dataloaders = {    
  x: torch.utils.data.DataLoader(  #define how to sampling(batch...)
    image_datasets[x],       #dataset
    batch_size=8,         #how many samples per batch to load 
    shuffle=True,         #資料打亂 reshuffle at every epoch
    num_workers=2         #how many subprocess to load dataset
                    #0 => only main function => slow
                    #1 => only one subprocess => slow too
  )
  for x in ['train', 'val']
}
#print(dataloaders['train'])

## 取得訓練資料集與驗證資料集的資料量
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']} #how many img in train/val
print(dataset_sizes)

# 取得各類別的名稱
class_names = image_datasets['train'].classes
print(class_names)

#get data of batch size
inputs, classes, paths = next(iter(dataloaders['train']))
#classes_names = classes.tolist()
for i in range(5):
    #print(classes[i])
    #print(paths[i])
    idx = classes[i].item()
    #print(idx)
    #idx_string = str(idx)
    #print(idx_string)
    print(class_names[idx])

# 若 CUDA 環境可用，則使用 GPU 計算，否則使用 CPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)



{'train': 1752, 'val': 438}
['0', '1', '10', '100', '101', '102', '103', '104', '105', '106', '107', '108', '109', '11', '110', '111', '112', '113', '114', '115', '116', '117', '118', '119', '12', '120', '121', '122', '123', '124', '125', '126', '127', '128', '129', '13', '130', '131', '132', '133', '134', '135', '136', '137', '138', '139', '14', '140', '141', '142', '143', '144', '145', '146', '147', '148', '149', '15', '150', '151', '152', '153', '154', '155', '156', '157', '158', '159', '16', '160', '161', '162', '163', '164', '165', '166', '167', '168', '169', '17', '170', '171', '172', '173', '174', '175', '176', '177', '178', '179', '18', '180', '181', '182', '183', '184', '185', '186', '187', '188', '189', '19', '190', '191', '192', '193', '194', '195', '196', '197', '198', '199', '2', '20', '200', '201', '202', '203', '204', '205', '206', '207', '208', '209', '21', '210', '211', '212', '213', '214', '215', '216', '217', '218', '22', '23', '24', '25', '26', '27', '28', '29', '3'

## look data

In [15]:
def tensor2img(inp):
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    return inp

In [16]:
col = {'filename':[], 'category':[]}

"""
col['filename'].append('apple')
col['filename'].append('banana')

col['category'].append(1)
col['category'].append(2)

col_df = pd.DataFrame(col)
col_df.to_csv('label.csv', index=False)
"""

#print(col['filename'])

"\ncol['filename'].append('apple')\ncol['filename'].append('banana')\n\ncol['category'].append(1)\ncol['category'].append(2)\n\ncol_df = pd.DataFrame(col)\ncol_df.to_csv('label.csv', index=False)\n"

## train model

In [17]:
# 將多張圖片拼成一張 grid 圖
#out = torchvision.utils.make_grid(inputs)

# 顯示圖片
#img = tensor2img(out)
#plt.imshow(img)
#plt.title([class_names[x] for x in classes])

In [18]:
# 訓練模型用函數
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time() # 記錄開始時間

    # 記錄最佳模型
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    best_1 = []
    best_2 = []

    # 訓練模型主迴圈
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # 對於每個 epoch，分別進行訓練模型與驗證模型
        for phase in ['train', 'val']:

            temp_1 = []
            temp_2 = []

            if phase == 'train':
                model.train()  # 將模型設定為訓練模式
            else:
                model.eval()   # 將模型設定為驗證模式

            running_loss = 0.0
            running_corrects = 0
            # 以 DataLoader 載入 batch 資料
            for inputs, labels, paths in tqdm(dataloaders[phase]):
                # 將資料放置於 GPU 或 CPU
                inputs = inputs.to(device)
                labels = labels.to(device)

                # 重設參數梯度（gradient）
                optimizer.zero_grad()

                # 只在訓練模式計算參數梯度
                with torch.set_grad_enabled(phase == 'train'):
                    # 正向傳播（forward）
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()  # 反向傳播（backward）
                        optimizer.step() # 更新參數
                '''    
                for item in paths:
                  path = os.path.dirname(item)
                  #print(path)
                  path = os.path.basename(path)
                  #print(path)
                '''
                
                # 計算統計值
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                #temp = dict(zip(paths, preds))
                '''
                for item, x in zip(paths, labels):
                    print('filename : ' + item, end = ' ')
                    idx = x.item()
                    idx_string = str(idx)
                    print('labels : ',x.item(), end = ' ')
                    print('class : ',class_names.index(idx_string))
                '''

                #zip
                
                if phase == 'val':
                  for item, x in zip(paths, preds):
                    #print(item)
                    temp_1.append(os.path.basename(item))
                    #print(preds[0])
                    #print(x)
                    idx = x.item()
                    temp_2.append(class_names[idx])
                    #col_df = pd.DataFrame(col)
                    #col_df.to_csv('label.csv', index=False)
                    #print('filename = ' + os.path.basename(item), end=' ')
                    #print('class = ',x.item())
                
                """
                if phase == 'val':
                  for item in paths:
                    arr = ""
                    if item != ',':
                      arr = arr + item
                    arr = os.path.basename(arr)
                    print('filename = ' + arr, end='  ')
                """

            if phase == 'train':
                # 更新 scheduler
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # 記錄最佳模型
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                best_1 = temp_1
                best_2 = temp_2


    # 計算耗費時間
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    # 輸出最佳準確度
    print('Best val Acc: {:4f}'.format(best_acc))

    # 載入最佳模型參數
    model.load_state_dict(best_model_wts)
    torch.save(model.state_dict(),"model.pt")

    #output csv
    col['filename'] = best_1
    col['category'] = best_2
    col_df = pd.DataFrame(col)
    col_df.to_csv('label.csv', index=False)
    return model

## fine-tuning model

In [19]:
# 載入 ResNet18 預訓練模型
model_ft = models.resnet18(pretrained=True)

# 取得 ResNet18 最後一層的輸入特徵數量
num_ftrs = model_ft.fc.in_features

# 將 ResNet18 的最後一層改為只有兩個輸出線性層
# 更一般化的寫法為 nn.Linear(num_ftrs, len(class_names))
model_ft.fc = nn.Linear(num_ftrs, 219)

# 將模型放置於 GPU 或 CPU
model_ft = model_ft.to(device)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [20]:
# 使用 cross entropy loss
criterion = nn.CrossEntropyLoss()

# 學習優化器
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# 每 7 個 epochs 將 learning rate 降為原本的 0.1 倍
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

## start training

In [21]:
# 訓練模型
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler, num_epochs=25)

Epoch 0/24
----------


100%|██████████| 219/219 [03:10<00:00,  1.15it/s]


train Loss: 5.2869 Acc: 0.0245


100%|██████████| 55/55 [00:50<00:00,  1.09it/s]


val Loss: 5.5446 Acc: 0.0160
Epoch 1/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.19it/s]


train Loss: 4.2298 Acc: 0.1227


100%|██████████| 55/55 [00:04<00:00, 13.67it/s]


val Loss: 6.4781 Acc: 0.0046
Epoch 2/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.13it/s]


train Loss: 3.3674 Acc: 0.2637


100%|██████████| 55/55 [00:03<00:00, 13.85it/s]


val Loss: 7.0223 Acc: 0.0046
Epoch 3/24
----------


100%|██████████| 219/219 [00:15<00:00, 13.90it/s]


train Loss: 2.7860 Acc: 0.3858


100%|██████████| 55/55 [00:03<00:00, 13.86it/s]


val Loss: 8.4289 Acc: 0.0046
Epoch 4/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.03it/s]


train Loss: 2.3671 Acc: 0.4720


100%|██████████| 55/55 [00:04<00:00, 13.13it/s]


val Loss: 9.4240 Acc: 0.0046
Epoch 5/24
----------


100%|██████████| 219/219 [00:15<00:00, 13.69it/s]


train Loss: 2.0171 Acc: 0.5474


100%|██████████| 55/55 [00:04<00:00, 13.20it/s]


val Loss: 9.5993 Acc: 0.0046
Epoch 6/24
----------


100%|██████████| 219/219 [00:16<00:00, 13.57it/s]


train Loss: 1.7670 Acc: 0.5987


100%|██████████| 55/55 [00:03<00:00, 13.90it/s]


val Loss: 10.4095 Acc: 0.0046
Epoch 7/24
----------


100%|██████████| 219/219 [00:15<00:00, 13.99it/s]


train Loss: 1.4923 Acc: 0.7066


100%|██████████| 55/55 [00:03<00:00, 13.95it/s]


val Loss: 10.3350 Acc: 0.0046
Epoch 8/24
----------


100%|██████████| 219/219 [00:15<00:00, 13.98it/s]


train Loss: 1.4213 Acc: 0.7534


100%|██████████| 55/55 [00:04<00:00, 13.72it/s]


val Loss: 10.6266 Acc: 0.0046
Epoch 9/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.11it/s]


train Loss: 1.3684 Acc: 0.7705


100%|██████████| 55/55 [00:03<00:00, 13.92it/s]


val Loss: 10.4097 Acc: 0.0046
Epoch 10/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.09it/s]


train Loss: 1.3609 Acc: 0.7763


100%|██████████| 55/55 [00:03<00:00, 13.86it/s]


val Loss: 10.2980 Acc: 0.0046
Epoch 11/24
----------


100%|██████████| 219/219 [00:15<00:00, 13.91it/s]


train Loss: 1.3023 Acc: 0.7945


100%|██████████| 55/55 [00:03<00:00, 14.14it/s]


val Loss: 10.7033 Acc: 0.0046
Epoch 12/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.05it/s]


train Loss: 1.2788 Acc: 0.7962


100%|██████████| 55/55 [00:03<00:00, 14.13it/s]


val Loss: 10.3316 Acc: 0.0046
Epoch 13/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.25it/s]


train Loss: 1.2587 Acc: 0.7991


100%|██████████| 55/55 [00:03<00:00, 14.20it/s]


val Loss: 10.4664 Acc: 0.0046
Epoch 14/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.16it/s]


train Loss: 1.2570 Acc: 0.7991


100%|██████████| 55/55 [00:03<00:00, 13.81it/s]


val Loss: 10.5836 Acc: 0.0046
Epoch 15/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.22it/s]


train Loss: 1.2593 Acc: 0.7985


100%|██████████| 55/55 [00:03<00:00, 13.92it/s]


val Loss: 10.5875 Acc: 0.0046
Epoch 16/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.17it/s]


train Loss: 1.2073 Acc: 0.8196


100%|██████████| 55/55 [00:03<00:00, 13.95it/s]


val Loss: 10.5941 Acc: 0.0046
Epoch 17/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.20it/s]


train Loss: 1.2378 Acc: 0.8019


100%|██████████| 55/55 [00:03<00:00, 13.81it/s]


val Loss: 10.7829 Acc: 0.0046
Epoch 18/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.34it/s]


train Loss: 1.2613 Acc: 0.7991


100%|██████████| 55/55 [00:03<00:00, 14.05it/s]


val Loss: 10.7790 Acc: 0.0046
Epoch 19/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.24it/s]


train Loss: 1.2604 Acc: 0.8002


100%|██████████| 55/55 [00:03<00:00, 14.02it/s]


val Loss: 10.6122 Acc: 0.0046
Epoch 20/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.10it/s]


train Loss: 1.2114 Acc: 0.8048


100%|██████████| 55/55 [00:03<00:00, 14.04it/s]


val Loss: 10.5318 Acc: 0.0046
Epoch 21/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.07it/s]


train Loss: 1.2254 Acc: 0.8099


100%|██████████| 55/55 [00:03<00:00, 13.99it/s]


val Loss: 10.7398 Acc: 0.0046
Epoch 22/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.09it/s]


train Loss: 1.2369 Acc: 0.8128


100%|██████████| 55/55 [00:03<00:00, 13.77it/s]


val Loss: 10.7320 Acc: 0.0046
Epoch 23/24
----------


100%|██████████| 219/219 [00:15<00:00, 13.96it/s]


train Loss: 1.2179 Acc: 0.8174


100%|██████████| 55/55 [00:04<00:00, 13.71it/s]


val Loss: 10.5623 Acc: 0.0046
Epoch 24/24
----------


100%|██████████| 219/219 [00:15<00:00, 14.03it/s]


train Loss: 1.2353 Acc: 0.8134


100%|██████████| 55/55 [00:03<00:00, 13.84it/s]


val Loss: 10.7257 Acc: 0.0046
Training complete in 11m 51s
Best val Acc: 0.015982


## predict model

In [22]:
# 使用模型進行預測，並顯示結果
def visualize_model(model, num_images=6):
    was_training = model.training # 記錄模型之前的模式
    model.eval() # 將模型設定為驗證模式

    images_so_far = 0
    fig = plt.figure()

    with torch.no_grad():
        # 以 DataLoader 載入 batch 資料
        for i, (inputs, labels) in enumerate(dataloaders['val']):
            # 將資料放置於 GPU 或 CPU
            inputs = inputs.to(device)
            labels = labels.to(device)

            # 使用模型進行預測
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)

            # 顯示預測結果與圖片
            for j in range(inputs.size()[0]):      #inputs.size() => torch.Size([4, 3, 224, 224])
                images_so_far += 1
                ax = plt.subplot(num_images//2, 2, images_so_far)
                ax.axis('off')
                ax.set_title('predicted: {}'.format(class_names[preds[j]]))

                # 將 Tensor 轉為原始圖片
                img = tensor2img(inputs.cpu().data[j])

                ax.imshow(img)

                if images_so_far == num_images:
                    model.train(mode=was_training) # 恢復模型之前的模式
                    return

        model.train(mode=was_training) # 恢復模型之前的模式

In [23]:
# 以模型進行預測
#visualize_model(model_ft)