### Load data

In [None]:
import os
from PIL import Image
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

class CUB_ImageFolder(Dataset):
    def __init__(self, path: str, transform: transforms.Compose, train: bool=True) -> None:
        """
        Initialize an ImageFolder like the one provided in `torchvision.datasets.ImageFolder`.
        
        Args:
        - path: The path of the root directory of the dataset.
        - transform: The transform applied to the dataset.
        - train: Boolean, return the train dataset if True else test dataset, default is True. 
        """
        super(CUB_ImageFolder, self).__init__()
        self.root = path
        self.transform = transform
        self.train = train
        self.images = []
        self.labels = []
        self.train_idx = []
        self.test_idx = []
        
        self._load_dataset()
        self._get_train_test()
        
        self.idx = self.train_idx if self.train else self.test_idx
        
    def _load_dataset(self):
        """
        Load the image path and corresponding labels from the 'images.txt'
        and 'image_class_labels.txt'. 
        """
        # load image paths
        with open(os.path.join(self.root, 'images.txt')) as f:
            for line in f:
                self.images.append(line.strip().split()[1])
        # load image labels
        with open(os.path.join(self.root, 'image_class_labels.txt')) as f:
            for line in f:
                self.labels.append(line.strip().split()[1])
        
    def _get_train_test(self):
        """
        Get the indices of the training and testing dataset from the 'train_test_split.txt'.
        """
        with open(os.path.join(self.root, 'train_test_split.txt')) as f:
            for line in f: 
                idx, is_train = map(int, line.strip().split())
                self.train_idx.append(idx) if is_train == 1 else self.test_idx.append(idx)
                
    def __len__(self):
        return len(self.idx) 

    def __getitem__(self, index):
        image_id = self.idx[index] - 1
        image_path, image_label = self.images[image_id], self.labels[image_id]
        # get raw images and apply transformation
        image_matrix = Image.open(os.path.join(self.root, 'images', image_path)).convert('RGB')
        if self.transform:
            image_matrix = self.transform(image_matrix)
        # convert the returned label into a tensor, here we need "minius one" to align with the 
        # custom that Python's index starts from 0
        image_label = torch.tensor(int(image_label) - 1)
        return image_matrix, image_label


def preprocess_data(batch_size: int=128) -> tuple[DataLoader, DataLoader]:
    """
    Preprocess the CUB-200-2011 dataset and return the train and test 'DataLoader'.
    
    Args:
    - batch_size: The number of samples in one batch, default is 128.
    """
    
    # resize and normalize the images
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    data_dir = 'data/CUB_200_2011'

    # load the dataset and extract the train/test Dataloader
    train_dataset = CUB_ImageFolder(data_dir, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    
    test_dataset = CUB_ImageFolder(data_dir, transform=transform, train=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    
    return train_loader, test_loader


### Define model

In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

class CUB_ResNet_18(nn.Module):
    def __init__(self, num_classes: int=200, pretrain: bool=True):
        """
        Create a neural network with the same architecture as ResNet-18. The output layer is 
        resized to (`in_features`, `num_classes`) to fit into the specific dataset.
        
        Args:
        - num_classes: Number of classes(labels), default is 200.
        - pretrain: Boolean, whether the paramters of ResNet-18 is pretrained or not. Default
        is True.
        """
        super(CUB_ResNet_18, self).__init__()
        # initialize the parameters
        if pretrain:
            self.resnet18 = models.resnet18(weights="ResNet18_Weights.IMAGENET1K_V1")
        else:
            self.resnet18 = models.resnet18(weights=None)
        # change the output layer
        self.resnet18.fc = nn.Linear(self.resnet18.fc.in_features, num_classes)
        
    def forward(self, x: torch.Tensor):
        """
        Forward pass of the network.
        """
        return self.resnet18(x)

### Define solver

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
torch.manual_seed(509)

from tqdm import tqdm

def get_data_model_criterion(pretrain: bool=True) -> tuple:
    """
    Get the DataLoader, model and loss criterion.
    """
    # load the dataset
    train_loader, test_loader = preprocess_data()

    # get the pretrained model
    model = CUB_ResNet_18(pretrain=pretrain)

    # define loss function
    criterion = nn.CrossEntropyLoss()
    
    return train_loader, test_loader, model, criterion


def train_resnet_with_cub(num_epochs: list[int], fine_tuning_lr: float=0.0001, output_lr: float=0.001, pretrain: bool=True, **kwargs) -> float:
    """
    Train the modified ResNet-18 model using the CUB-200-2011 dataset and return the best accuracy.
    Some hyper-parameters can be modified here.
    
    Args:
    - num_epochs: A list of number of training epochs.
    - fine_tuning_lr: Learning rate of the parameters outside the output layer, default is 0.0001.
    - output_lr: Learning rate of the parameters inside the output layer, default is 0.001.
    - pretrain: Boolean, whether the ResNet-18 model is pretrained or not. Default is True.
    
    Return:
    - best_acc: The best validation accuracy during the training process.
    """
    # get the dataset, model and loss criterion
    train_loader, test_loader, model, criterion = get_data_model_criterion(pretrain)
    
    # move the model to CUDA (GPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    # get the parameters of the model expect the last layer
    former_params = [p for name, p in model.resnet18.named_parameters() if 'fc' not in name]
    
    # pop the hyper-parameters from the kwargs dict
    momentum = kwargs.pop('momentum', 0.9)
        
    # define optimizer
    optimizer = optim.SGD([
                {'params': former_params, 'lr': fine_tuning_lr},
                {'params': model.resnet18.fc.parameters()}
            ], lr=output_lr, momentum=momentum
        )
    
    # init the tensorboard
    tensorboard_name = "Fine_Tuning_With_Pretrain" if pretrain else "Fine_Tuning_Random_Initialize"
    writer = SummaryWriter(tensorboard_name, comment="-{}-{}-{}".format(num_epoch, fine_tuning_lr, output_lr))
        
    # best accuracy
    best_acc = 0.0
    store_best_acc, count = [0 for _ in range(len(num_epochs))], 0
    max_num_epoch = max(num_epochs)
    
    # iterate
    for epoch in range(max_num_epoch):
        # train
        model.train()
        running_loss = 0.0
        samples = 0
        for inputs, labels in tqdm(train_loader):
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            samples += inputs.size(0)
            running_loss += loss.item() * inputs.size(0)
        
        epoch_loss = running_loss / samples
        print("[Epoch {:>2} / {:>2}], Training loss is {:>8.6f}".format(epoch + 1, max_num_epoch, epoch_loss))
        writer.add_scalar('Train/Loss', epoch_loss, epoch)

        # test
        model.eval()
        correct = 0
        total = 0
        running_loss = 0.0
        with torch.no_grad():
            for inputs, labels in tqdm(test_loader):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                running_loss += criterion(outputs, labels).item() * inputs.size(0)
                correct += (predicted == labels).sum().item()

        epoch_loss = running_loss / total
        writer.add_scalar('Validation/Loss', epoch_loss, epoch)
        accuracy = correct / total
        writer.add_scalar('Validation/Accuracy', accuracy, epoch)
        print("[Epoch {:>2} / {:>2}], Validation loss is {:>8.6f}, Validation accuracy is {:>8.6f}".format(
            epoch + 1, max_num_epoch, epoch_loss, accuracy
        ))
        best_acc = max(best_acc, accuracy)
        
        if epoch + 1 == num_epochs[count]:
            store_best_acc[count] = best_acc
            count += 1

    # close the tensorboard
    writer.close()
    
    return store_best_acc

### Google Colab Setup

In [None]:
from google.colab import drive

drive.mount("/content/drive")

In [6]:
import os
import sys

# TODO: Fill in the Google Drive path where you uploaded the assignment
# Example: If you create a WI2022 folder and put all the files under A5 folder, then "WI2022/A5"
GOOGLE_DRIVE_PATH_AFTER_MYDRIVE = "CUB-200-2011"

GOOGLE_DRIVE_PATH = os.path.join("drive", "My Drive", GOOGLE_DRIVE_PATH_AFTER_MYDRIVE)
print(os.listdir(GOOGLE_DRIVE_PATH))


# Add to sys so we can import .py files.

sys.path.append(GOOGLE_DRIVE_PATH) 

FileNotFoundError: [WinError 3] 系统找不到指定的路径。: 'drive\\My Drive\\CUB-200-2011'

### Set the environment variable

In [1]:
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'

### Load functions

In [4]:
from itertools import product

### Hyper-parameters

In [3]:
# set hyper-parameters here
num_epochs = [10, 15, 20]
fine_tuning_lrs = [0.0001, 0.0005, 0.01]
output_lrs = [0.01, 0.02, 0.04, 0.06]

configurations = list(product(fine_tuning_lrs, output_lrs))

### Train

In [None]:
best_accs = []

# train with the pretrained model
for config in configurations:
    curr_best_acc = train_resnet_with_cub(num_epochs, fine_tuning_lr=config[0], output_lr=config[1])
    best_accs.extend(curr_best_acc)
    
# write the results into a txt file
with open('best_accuracy.txt', 'w') as f:
    for config, accuracy in zip(list(product(fine_tuning_lrs, output_lrs, num_epochs)), best_accs):
        f.write(f"Configuration: {config}, Accuracy: {accuracy}\n")