# HW3 Image Classification
## We strongly recommend that you run with [Kaggle](https://www.kaggle.com/t/86ca241732c04da99aca6490080bae73) for this homework

If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2023-spring@googlegroups.com

# Check GPU Type

In [None]:
!nvidia-smi

# Get Data
Notes: if the links are dead, you can download the data directly from Kaggle and upload it to the workspace, or you can use the Kaggle API to directly download the data into colab.


In [None]:
# Download Link
# Link 1 (Dropbox): https://www.dropbox.com/s/up5q1gthsz3v0dq/food-11.zip?dl=0
# Link 2 (Google Drive): https://drive.google.com/file/d/1tbGNwk1yGoCBdu4Gi_Cia7EJ9OhubYD9/view?usp=share_link
# Link 3: Kaggle Competition.

# (1) dropbox link
# !wget -O food11.zip https://www.dropbox.com/s/up5q1gthsz3v0dq/food-11.zip?dl=0

# (2) google drive link
# !pip install gdown --upgrade
# !gdown --id '1tbGNwk1yGoCBdu4Gi_Cia7EJ9OhubYD9' --output food11.zip

In [None]:
# ! unzip food11.zip

# Import Packages

In [None]:
_exp_name = "vgg13"

In [None]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
import random

In [None]:
myseed = 9453  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

# Transforms
Torchvision provides lots of useful utilities for image preprocessing, data *wrapping* as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [None]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation((-90,90),interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225), inplace=False)
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    transforms.RandomRotation((-90,90),interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomGrayscale(p=0.2),
    transforms.RandomInvert(p=0.2),
    transforms.RandomPosterize(bits=2, p=0.2),
    transforms.RandomSolarize(threshold=200, p=0.2),
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.3, scale=(0.02, 0.33), ratio=(0.3, 3.3), value='random', inplace=False),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225), inplace=False)
])


# Datasets
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [None]:
class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([path+"/"+x for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files
            
        self.transform = tfm
  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)
        
        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label
            
        return im,label

# Model

In [None]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        # torch.nn.MaxPool2d(kernel_size, stride, padding)
        # input 維度 [3, 128, 128]
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, 1, 1),  # [64, 128, 128]
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [64, 64, 64]

            nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [128, 32, 32]

            nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),      # [256, 16, 16]

            nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 8, 8]
            
            nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.MaxPool2d(2, 2, 0),       # [512, 4, 4]
        )
        self.fc = nn.Sequential(
            nn.Linear(512*4*4, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 11)
        )

    def forward(self, x):
        out = self.cnn(x)
        out = out.view(out.size()[0], -1)
        return self.fc(out)

# Configurations

In [36]:
import torchvision.models as models
import timm
from timm.scheduler.cosine_lr import CosineLRScheduler
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
# vitb16 = timm.create_model('vit_base_patch16_224', pretrained=False, img_size=128, num_classes=11)
# vitb16 = models.vit_h_14(weights=False)
# vitb16.heads.head = nn.Linear(vitb16.heads.head.in_features,11)
# model = vitb16.to(device)
VGG13 = models.vgg13(weights=False, num_classes=11)
model = VGG13.to(device)

# The number of batch size.
batch_size = 64

# The number of training epochs.
n_epochs = 400

# If no improvement in 'patience' epochs, early stop.
patience = 100

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, weight_decay=1e-4)
# scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=5e-7,verbose=True)
scheduler = CosineLRScheduler(optimizer, t_initial=20, cycle_decay=0.9, cycle_limit=20, lr_min=3e-7)



# Dataloader

In [None]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset("./train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset("./valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

# Start Training

In [26]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):
    print("Learning rate: " + str(scheduler.get_epoch_values(epoch)[0]))
    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)
        
    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

    scheduler.step(epoch)

100%|██████████| 157/157 [01:27<00:00,  1.79it/s]


[ Train | 286/400 ] loss = 0.33200, acc = 0.88615


100%|██████████| 57/57 [00:18<00:00,  3.14it/s]


[ Valid | 286/400 ] loss = 0.93627, acc = 0.77937
[ Valid | 286/400 ] loss = 0.93627, acc = 0.77937
Learning rate: 5.4546982731770346e-05


100%|██████████| 157/157 [01:27<00:00,  1.79it/s]


[ Train | 287/400 ] loss = 0.33821, acc = 0.88565


100%|██████████| 57/57 [00:18<00:00,  3.11it/s]


[ Valid | 287/400 ] loss = 0.92370, acc = 0.77316
[ Valid | 287/400 ] loss = 0.92370, acc = 0.77316
Learning rate: 4.9975859766079045e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 288/400 ] loss = 0.32056, acc = 0.88724


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 288/400 ] loss = 0.89988, acc = 0.78316
[ Valid | 288/400 ] loss = 0.89988, acc = 0.78316
Learning rate: 4.502281260134256e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 289/400 ] loss = 0.30923, acc = 0.89232


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 289/400 ] loss = 0.95291, acc = 0.77686
[ Valid | 289/400 ] loss = 0.95291, acc = 0.77686
Learning rate: 3.9809801696977804e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 290/400 ] loss = 0.30419, acc = 0.89689


100%|██████████| 57/57 [00:18<00:00,  3.14it/s]


[ Valid | 290/400 ] loss = 0.93722, acc = 0.77706
[ Valid | 290/400 ] loss = 0.93722, acc = 0.77706
Learning rate: 3.4465188682441514e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 291/400 ] loss = 0.29884, acc = 0.89898


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 291/400 ] loss = 0.97808, acc = 0.77768
[ Valid | 291/400 ] loss = 0.97808, acc = 0.77768
Learning rate: 2.9120575667905234e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 292/400 ] loss = 0.28431, acc = 0.90307


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 292/400 ] loss = 0.94758, acc = 0.78367
[ Valid | 292/400 ] loss = 0.94758, acc = 0.78367
Learning rate: 2.390756476354047e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 293/400 ] loss = 0.29051, acc = 0.89889


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 293/400 ] loss = 0.98810, acc = 0.77567
[ Valid | 293/400 ] loss = 0.98810, acc = 0.77567
Learning rate: 1.8954517598803985e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 294/400 ] loss = 0.26983, acc = 0.90774


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 294/400 ] loss = 0.99073, acc = 0.78670
[ Valid | 294/400 ] loss = 0.99073, acc = 0.78670
Learning rate: 1.4383394633112685e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 295/400 ] loss = 0.25563, acc = 0.91421


100%|██████████| 57/57 [00:18<00:00,  3.17it/s]


[ Valid | 295/400 ] loss = 0.98754, acc = 0.78136
[ Valid | 295/400 ] loss = 0.98754, acc = 0.78136
Learning rate: 1.0306752084569234e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 296/400 ] loss = 0.26658, acc = 0.90864


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 296/400 ] loss = 0.99668, acc = 0.78730
[ Valid | 296/400 ] loss = 0.99668, acc = 0.78730
Learning rate: 6.824970422319713e-06


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 297/400 ] loss = 0.25212, acc = 0.91332


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 297/400 ] loss = 0.99185, acc = 0.79004
[ Valid | 297/400 ] loss = 0.99185, acc = 0.79004
Learning rate: 4.023782666259539e-06


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 298/400 ] loss = 0.25244, acc = 0.91501


100%|██████████| 57/57 [00:17<00:00,  3.19it/s]


[ Valid | 298/400 ] loss = 1.00011, acc = 0.78666
[ Valid | 298/400 ] loss = 1.00011, acc = 0.78666
Learning rate: 1.9721633555520814e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 299/400 ] loss = 0.24381, acc = 0.91620


100%|██████████| 57/57 [00:17<00:00,  3.20it/s]


[ Valid | 299/400 ] loss = 0.94618, acc = 0.79209
[ Valid | 299/400 ] loss = 0.94618, acc = 0.79209 -> best
Best model found at epoch 298, saving model
Learning rate: 7.206301665610775e-07


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 300/400 ] loss = 0.24641, acc = 0.91610


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 300/400 ] loss = 0.96942, acc = 0.79299
[ Valid | 300/400 ] loss = 0.96942, acc = 0.79299 -> best
Best model found at epoch 299, saving model
Learning rate: 6.176733962839471e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 301/400 ] loss = 0.25824, acc = 0.91083


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 301/400 ] loss = 1.00361, acc = 0.78862
[ Valid | 301/400 ] loss = 1.00361, acc = 0.78862
Learning rate: 6.138895715338082e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 302/400 ] loss = 0.29109, acc = 0.90167


100%|██████████| 57/57 [00:17<00:00,  3.19it/s]


[ Valid | 302/400 ] loss = 0.99267, acc = 0.78063
[ Valid | 302/400 ] loss = 0.99267, acc = 0.78063
Learning rate: 6.026312676065342e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 303/400 ] loss = 0.32038, acc = 0.88744


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 303/400 ] loss = 1.01467, acc = 0.76749
[ Valid | 303/400 ] loss = 1.01467, acc = 0.76749
Learning rate: 5.84175701308983e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 304/400 ] loss = 0.31271, acc = 0.89351


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 304/400 ] loss = 0.97438, acc = 0.77654
[ Valid | 304/400 ] loss = 0.97438, acc = 0.77654
Learning rate: 5.589773099339136e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 305/400 ] loss = 0.30649, acc = 0.89431


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 305/400 ] loss = 1.04042, acc = 0.76699
[ Valid | 305/400 ] loss = 1.04042, acc = 0.76699
Learning rate: 5.276565615056461e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 306/400 ] loss = 0.30864, acc = 0.89441


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 306/400 ] loss = 0.97922, acc = 0.77901
[ Valid | 306/400 ] loss = 0.97922, acc = 0.77901
Learning rate: 4.909846767980892e-05


100%|██████████| 157/157 [01:26<00:00,  1.80it/s]


[ Train | 307/400 ] loss = 0.31351, acc = 0.89152


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 307/400 ] loss = 0.96431, acc = 0.77533
[ Valid | 307/400 ] loss = 0.96431, acc = 0.77533
Learning rate: 4.498646393197504e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 308/400 ] loss = 0.29161, acc = 0.89988


100%|██████████| 57/57 [00:18<00:00,  3.12it/s]


[ Valid | 308/400 ] loss = 0.92470, acc = 0.78526
[ Valid | 308/400 ] loss = 0.92470, acc = 0.78526
Learning rate: 4.053089608629268e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 309/400 ] loss = 0.29203, acc = 0.90008


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 309/400 ] loss = 1.05882, acc = 0.76991
[ Valid | 309/400 ] loss = 1.05882, acc = 0.76991
Learning rate: 3.584147501030442e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 310/400 ] loss = 0.28843, acc = 0.90297


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 310/400 ] loss = 0.99119, acc = 0.78095
[ Valid | 310/400 ] loss = 0.99119, acc = 0.78095
Learning rate: 3.103366981419736e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 311/400 ] loss = 0.27520, acc = 0.90386


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 311/400 ] loss = 0.96148, acc = 0.78456
[ Valid | 311/400 ] loss = 0.96148, acc = 0.78456
Learning rate: 2.622586461809031e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 312/400 ] loss = 0.27279, acc = 0.90356


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 312/400 ] loss = 1.02144, acc = 0.78291
[ Valid | 312/400 ] loss = 1.02144, acc = 0.78291
Learning rate: 2.1536443542102045e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 313/400 ] loss = 0.25305, acc = 0.91451


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 313/400 ] loss = 0.98927, acc = 0.78421
[ Valid | 313/400 ] loss = 0.98927, acc = 0.78421
Learning rate: 1.7080875696419677e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 314/400 ] loss = 0.26635, acc = 0.91033


100%|██████████| 57/57 [00:17<00:00,  3.19it/s]


[ Valid | 314/400 ] loss = 0.95820, acc = 0.79292
[ Valid | 314/400 ] loss = 0.95820, acc = 0.79292
Learning rate: 1.2968871948585802e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 315/400 ] loss = 0.26042, acc = 0.90983


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 315/400 ] loss = 0.94206, acc = 0.79289
[ Valid | 315/400 ] loss = 0.94206, acc = 0.79289
Learning rate: 9.301683477830108e-06


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 316/400 ] loss = 0.24899, acc = 0.91511


100%|██████████| 57/57 [00:18<00:00,  3.14it/s]


[ Valid | 316/400 ] loss = 0.95889, acc = 0.79187
[ Valid | 316/400 ] loss = 0.95889, acc = 0.79187
Learning rate: 6.169608635003365e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 317/400 ] loss = 0.23530, acc = 0.92197


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 317/400 ] loss = 0.99511, acc = 0.78942
[ Valid | 317/400 ] loss = 0.99511, acc = 0.78942
Learning rate: 3.649769497496411e-06


100%|██████████| 157/157 [01:24<00:00,  1.87it/s]


[ Train | 318/400 ] loss = 0.22970, acc = 0.92466


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 318/400 ] loss = 0.96230, acc = 0.78572
[ Valid | 318/400 ] loss = 0.96230, acc = 0.78572
Learning rate: 1.8042128677413002e-06


100%|██████████| 157/157 [01:23<00:00,  1.88it/s]


[ Train | 319/400 ] loss = 0.22383, acc = 0.92347


100%|██████████| 57/57 [00:17<00:00,  3.25it/s]


[ Valid | 319/400 ] loss = 1.01008, acc = 0.78760
[ Valid | 319/400 ] loss = 1.01008, acc = 0.78760
Learning rate: 6.783824750138967e-07


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 320/400 ] loss = 0.22395, acc = 0.92586


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 320/400 ] loss = 1.01696, acc = 0.78876
[ Valid | 320/400 ] loss = 1.01696, acc = 0.78876
Learning rate: 5.559060566555524e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 321/400 ] loss = 0.22425, acc = 0.92337


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 321/400 ] loss = 1.01606, acc = 0.78869
[ Valid | 321/400 ] loss = 1.01606, acc = 0.78869
Learning rate: 5.5250246112933806e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 322/400 ] loss = 0.29032, acc = 0.89829


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 322/400 ] loss = 0.97779, acc = 0.78230
[ Valid | 322/400 ] loss = 0.97779, acc = 0.78230
Learning rate: 5.423754823684365e-05


100%|██████████| 157/157 [01:23<00:00,  1.89it/s]


[ Train | 323/400 ] loss = 0.30051, acc = 0.89480


100%|██████████| 57/57 [00:17<00:00,  3.25it/s]


[ Valid | 323/400 ] loss = 1.01458, acc = 0.77238
[ Valid | 323/400 ] loss = 1.01458, acc = 0.77238
Learning rate: 5.257744801994565e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 324/400 ] loss = 0.29379, acc = 0.90018


100%|██████████| 57/57 [00:17<00:00,  3.25it/s]


[ Valid | 324/400 ] loss = 0.94851, acc = 0.78289
[ Valid | 324/400 ] loss = 0.94851, acc = 0.78289
Learning rate: 5.031082263913659e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 325/400 ] loss = 0.29208, acc = 0.90316


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 325/400 ] loss = 1.05151, acc = 0.77446
[ Valid | 325/400 ] loss = 1.05151, acc = 0.77446
Learning rate: 4.7493483933790346e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 326/400 ] loss = 0.27641, acc = 0.90376


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 326/400 ] loss = 1.01607, acc = 0.78172
[ Valid | 326/400 ] loss = 1.01607, acc = 0.78172
Learning rate: 4.419480413304364e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 327/400 ] loss = 0.28523, acc = 0.89988


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 327/400 ] loss = 0.99818, acc = 0.78508
[ Valid | 327/400 ] loss = 0.99818, acc = 0.78508
Learning rate: 4.049600768128144e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 328/400 ] loss = 0.28019, acc = 0.90446


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 328/400 ] loss = 0.98454, acc = 0.77987
[ Valid | 328/400 ] loss = 0.98454, acc = 0.77987
Learning rate: 3.648817122274778e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 329/400 ] loss = 0.25783, acc = 0.91103


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 329/400 ] loss = 1.02473, acc = 0.78926
[ Valid | 329/400 ] loss = 1.02473, acc = 0.78926
Learning rate: 3.2269980992298366e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 330/400 ] loss = 0.26707, acc = 0.91013


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 330/400 ] loss = 1.01115, acc = 0.77489
[ Valid | 330/400 ] loss = 1.01115, acc = 0.77489
Learning rate: 2.794530283277762e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 331/400 ] loss = 0.26671, acc = 0.91013


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 331/400 ] loss = 1.03853, acc = 0.79134
[ Valid | 331/400 ] loss = 1.03853, acc = 0.79134
Learning rate: 2.3620624673256882e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 332/400 ] loss = 0.24790, acc = 0.91451


100%|██████████| 57/57 [00:17<00:00,  3.30it/s]


[ Valid | 332/400 ] loss = 1.04728, acc = 0.77069
[ Valid | 332/400 ] loss = 1.04728, acc = 0.77069
Learning rate: 1.940243444280746e-05


100%|██████████| 157/157 [01:22<00:00,  1.91it/s]


[ Train | 333/400 ] loss = 0.24363, acc = 0.91760


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 333/400 ] loss = 1.03681, acc = 0.78431
[ Valid | 333/400 ] loss = 1.03681, acc = 0.78431
Learning rate: 1.53945979842738e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 334/400 ] loss = 0.23596, acc = 0.91909


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 334/400 ] loss = 0.99617, acc = 0.79443
[ Valid | 334/400 ] loss = 0.99617, acc = 0.79443 -> best
Best model found at epoch 333, saving model
Learning rate: 1.1695801532511608e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 335/400 ] loss = 0.24643, acc = 0.91570


100%|██████████| 57/57 [00:17<00:00,  3.29it/s]


[ Valid | 335/400 ] loss = 1.02299, acc = 0.78129
[ Valid | 335/400 ] loss = 1.02299, acc = 0.78129
Learning rate: 8.397121731764896e-06


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 336/400 ] loss = 0.22786, acc = 0.92446


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 336/400 ] loss = 1.04678, acc = 0.79491
[ Valid | 336/400 ] loss = 1.04678, acc = 0.79491 -> best
Best model found at epoch 335, saving model
Learning rate: 5.579783026418652e-06


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 337/400 ] loss = 0.23309, acc = 0.91998


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 337/400 ] loss = 0.98926, acc = 0.79555
[ Valid | 337/400 ] loss = 0.98926, acc = 0.79555 -> best
Best model found at epoch 336, saving model
Learning rate: 3.313157645609595e-06


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 338/400 ] loss = 0.22620, acc = 0.92317


100%|██████████| 57/57 [00:17<00:00,  3.28it/s]


[ Valid | 338/400 ] loss = 1.04829, acc = 0.79516
[ Valid | 338/400 ] loss = 1.04829, acc = 0.79516
Learning rate: 1.6530574287115975e-06


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 339/400 ] loss = 0.22262, acc = 0.92416


100%|██████████| 57/57 [00:17<00:00,  3.25it/s]


[ Valid | 339/400 ] loss = 1.01106, acc = 0.78876
[ Valid | 339/400 ] loss = 1.01106, acc = 0.78876
Learning rate: 6.40359552621434e-07


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 340/400 ] loss = 0.23781, acc = 0.91859


100%|██████████| 57/57 [00:17<00:00,  3.28it/s]


[ Valid | 340/400 ] loss = 1.03507, acc = 0.79102
[ Valid | 340/400 ] loss = 1.03507, acc = 0.79102
Learning rate: 5.003154509899973e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 341/400 ] loss = 0.21842, acc = 0.92605


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 341/400 ] loss = 1.01541, acc = 0.79534
[ Valid | 341/400 ] loss = 1.01541, acc = 0.79534
Learning rate: 4.972540617653151e-05


100%|██████████| 157/157 [01:22<00:00,  1.89it/s]


[ Train | 342/400 ] loss = 0.27558, acc = 0.90864


100%|██████████| 57/57 [00:17<00:00,  3.25it/s]


[ Valid | 342/400 ] loss = 1.02120, acc = 0.77186
[ Valid | 342/400 ] loss = 1.02120, acc = 0.77186
Learning rate: 4.881452756541487e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 343/400 ] loss = 0.28582, acc = 0.90147


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 343/400 ] loss = 1.02573, acc = 0.77551
[ Valid | 343/400 ] loss = 1.02573, acc = 0.77551
Learning rate: 4.7321338120088267e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 344/400 ] loss = 0.27068, acc = 0.90705


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 344/400 ] loss = 1.05569, acc = 0.78435
[ Valid | 344/400 ] loss = 1.05569, acc = 0.78435
Learning rate: 4.5282605120307314e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 345/400 ] loss = 0.27860, acc = 0.90854


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 345/400 ] loss = 0.99857, acc = 0.78506
[ Valid | 345/400 ] loss = 0.99857, acc = 0.78506
Learning rate: 4.274852893869352e-05


100%|██████████| 157/157 [01:22<00:00,  1.91it/s]


[ Train | 346/400 ] loss = 0.26396, acc = 0.90864


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 346/400 ] loss = 0.97015, acc = 0.77960
[ Valid | 346/400 ] loss = 0.97015, acc = 0.77960
Learning rate: 3.9781506940954896e-05


100%|██████████| 157/157 [01:22<00:00,  1.91it/s]


[ Train | 347/400 ] loss = 0.25965, acc = 0.91043


100%|██████████| 57/57 [00:17<00:00,  3.29it/s]


[ Valid | 347/400 ] loss = 1.07317, acc = 0.78077
[ Valid | 347/400 ] loss = 1.07317, acc = 0.78077
Learning rate: 3.645459705565721e-05


100%|██████████| 157/157 [01:22<00:00,  1.91it/s]


[ Train | 348/400 ] loss = 0.25337, acc = 0.91133


100%|██████████| 57/57 [00:17<00:00,  3.29it/s]


[ Valid | 348/400 ] loss = 1.06337, acc = 0.77985
[ Valid | 348/400 ] loss = 1.06337, acc = 0.77985
Learning rate: 3.284971884555738e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 349/400 ] loss = 0.26180, acc = 0.91162


100%|██████████| 57/57 [00:17<00:00,  3.25it/s]


[ Valid | 349/400 ] loss = 1.05321, acc = 0.78195
[ Valid | 349/400 ] loss = 1.05321, acc = 0.78195
Learning rate: 2.9055636376092934e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 350/400 ] loss = 0.25982, acc = 0.91043


100%|██████████| 57/57 [00:17<00:00,  3.27it/s]


[ Valid | 350/400 ] loss = 1.07549, acc = 0.78204
[ Valid | 350/400 ] loss = 1.07549, acc = 0.78204
Learning rate: 2.5165772549499864e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 351/400 ] loss = 0.26023, acc = 0.91451


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 351/400 ] loss = 1.02676, acc = 0.78376
[ Valid | 351/400 ] loss = 1.02676, acc = 0.78376
Learning rate: 2.12759087229068e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 352/400 ] loss = 0.25053, acc = 0.91541


100%|██████████| 57/57 [00:17<00:00,  3.28it/s]


[ Valid | 352/400 ] loss = 1.05042, acc = 0.78090
[ Valid | 352/400 ] loss = 1.05042, acc = 0.78090
Learning rate: 1.7481826253442345e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 353/400 ] loss = 0.23510, acc = 0.92118


100%|██████████| 57/57 [00:17<00:00,  3.28it/s]


[ Valid | 353/400 ] loss = 0.98983, acc = 0.78872
[ Valid | 353/400 ] loss = 0.98983, acc = 0.78872
Learning rate: 1.3876948043342517e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 354/400 ] loss = 0.21602, acc = 0.92795


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 354/400 ] loss = 1.03486, acc = 0.79054
[ Valid | 354/400 ] loss = 1.03486, acc = 0.79054
Learning rate: 1.0550038158044836e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 355/400 ] loss = 0.23442, acc = 0.92436


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 355/400 ] loss = 1.06254, acc = 0.78447
[ Valid | 355/400 ] loss = 1.06254, acc = 0.78447
Learning rate: 7.583016160306204e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 356/400 ] loss = 0.21996, acc = 0.92596


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 356/400 ] loss = 1.00137, acc = 0.79395
[ Valid | 356/400 ] loss = 1.00137, acc = 0.79395
Learning rate: 5.048939978692412e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 357/400 ] loss = 0.20977, acc = 0.93133


100%|██████████| 57/57 [00:18<00:00,  3.17it/s]


[ Valid | 357/400 ] loss = 1.02473, acc = 0.79118
[ Valid | 357/400 ] loss = 1.02473, acc = 0.79118
Learning rate: 3.0102069789114618e-06


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 358/400 ] loss = 0.20831, acc = 0.93113


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 358/400 ] loss = 1.02748, acc = 0.79139
[ Valid | 358/400 ] loss = 1.02748, acc = 0.79139
Learning rate: 1.5170175335848653e-06


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 359/400 ] loss = 0.20905, acc = 0.93193


100%|██████████| 57/57 [00:18<00:00,  3.17it/s]


[ Valid | 359/400 ] loss = 1.02389, acc = 0.79052
[ Valid | 359/400 ] loss = 1.02389, acc = 0.79052
Learning rate: 6.061389224682177e-07


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 360/400 ] loss = 0.22493, acc = 0.92446


100%|██████████| 57/57 [00:17<00:00,  3.20it/s]


[ Valid | 360/400 ] loss = 1.01989, acc = 0.79429
[ Valid | 360/400 ] loss = 1.01989, acc = 0.79429
Learning rate: 4.502839058909975e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 361/400 ] loss = 0.20994, acc = 0.92834


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 361/400 ] loss = 1.05950, acc = 0.78698
[ Valid | 361/400 ] loss = 1.05950, acc = 0.78698
Learning rate: 4.475305023376943e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 362/400 ] loss = 0.24950, acc = 0.91551


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 362/400 ] loss = 1.05730, acc = 0.78408
[ Valid | 362/400 ] loss = 1.05730, acc = 0.78408
Learning rate: 4.393380896112895e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 363/400 ] loss = 0.26911, acc = 0.91123


100%|██████████| 57/57 [00:17<00:00,  3.19it/s]


[ Valid | 363/400 ] loss = 1.12157, acc = 0.77622
[ Valid | 363/400 ] loss = 1.12157, acc = 0.77622
Learning rate: 4.259083921021661e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 364/400 ] loss = 0.25304, acc = 0.91441


100%|██████████| 57/57 [00:18<00:00,  3.17it/s]


[ Valid | 364/400 ] loss = 1.04256, acc = 0.77144
[ Valid | 364/400 ] loss = 1.04256, acc = 0.77144
Learning rate: 4.075720935336096e-05


100%|██████████| 157/157 [01:25<00:00,  1.83it/s]


[ Train | 365/400 ] loss = 0.26673, acc = 0.91073


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 365/400 ] loss = 0.94612, acc = 0.78545
[ Valid | 365/400 ] loss = 0.94612, acc = 0.78545
Learning rate: 3.847806944310637e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 366/400 ] loss = 0.26013, acc = 0.91123


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 366/400 ] loss = 0.99542, acc = 0.78924
[ Valid | 366/400 ] loss = 0.99542, acc = 0.78924
Learning rate: 3.580953946807501e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 367/400 ] loss = 0.25511, acc = 0.91342


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 367/400 ] loss = 1.03619, acc = 0.79198
[ Valid | 367/400 ] loss = 1.03619, acc = 0.79198
Learning rate: 3.2817327492595394e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 368/400 ] loss = 0.24765, acc = 0.91710


100%|██████████| 57/57 [00:18<00:00,  3.17it/s]


[ Valid | 368/400 ] loss = 1.00245, acc = 0.78803
[ Valid | 368/400 ] loss = 1.00245, acc = 0.78803
Learning rate: 2.957511170608602e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 369/400 ] loss = 0.24177, acc = 0.91312


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 369/400 ] loss = 1.11014, acc = 0.78116
[ Valid | 369/400 ] loss = 1.11014, acc = 0.78116
Learning rate: 2.6162726221508034e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 370/400 ] loss = 0.24353, acc = 0.91312


100%|██████████| 57/57 [00:17<00:00,  3.21it/s]


[ Valid | 370/400 ] loss = 1.06561, acc = 0.78855
[ Valid | 370/400 ] loss = 1.06561, acc = 0.78855
Learning rate: 2.2664195294549875e-05


100%|██████████| 157/157 [01:25<00:00,  1.83it/s]


[ Train | 371/400 ] loss = 0.23707, acc = 0.91899


100%|██████████| 57/57 [00:17<00:00,  3.20it/s]


[ Valid | 371/400 ] loss = 1.04482, acc = 0.79114
[ Valid | 371/400 ] loss = 1.04482, acc = 0.79114
Learning rate: 1.9165664367591723e-05


100%|██████████| 157/157 [01:25<00:00,  1.83it/s]


[ Train | 372/400 ] loss = 0.23353, acc = 0.91929


100%|██████████| 57/57 [00:17<00:00,  3.22it/s]


[ Valid | 372/400 ] loss = 1.02008, acc = 0.79086
[ Valid | 372/400 ] loss = 1.02008, acc = 0.79086
Learning rate: 1.575327888301373e-05


100%|██████████| 157/157 [01:25<00:00,  1.84it/s]


[ Train | 373/400 ] loss = 0.22172, acc = 0.92526


100%|██████████| 57/57 [00:17<00:00,  3.21it/s]


[ Valid | 373/400 ] loss = 1.04306, acc = 0.79411
[ Valid | 373/400 ] loss = 1.04306, acc = 0.79411
Learning rate: 1.2511063096504358e-05


100%|██████████| 157/157 [01:25<00:00,  1.83it/s]


[ Train | 374/400 ] loss = 0.21670, acc = 0.92805


100%|██████████| 57/57 [00:17<00:00,  3.19it/s]


[ Valid | 374/400 ] loss = 1.08310, acc = 0.79024
[ Valid | 374/400 ] loss = 1.08310, acc = 0.79024
Learning rate: 9.518851121024739e-06


100%|██████████| 157/157 [01:25<00:00,  1.83it/s]


[ Train | 375/400 ] loss = 0.21774, acc = 0.92476


100%|██████████| 57/57 [00:17<00:00,  3.19it/s]


[ Valid | 375/400 ] loss = 1.07562, acc = 0.78952
[ Valid | 375/400 ] loss = 1.07562, acc = 0.78952
Learning rate: 6.850321145993382e-06


100%|██████████| 157/157 [01:25<00:00,  1.83it/s]


[ Train | 376/400 ] loss = 0.21836, acc = 0.92715


100%|██████████| 57/57 [00:17<00:00,  3.21it/s]


[ Valid | 376/400 ] loss = 1.07641, acc = 0.78696
[ Valid | 376/400 ] loss = 1.07641, acc = 0.78696
Learning rate: 4.571181235738794e-06


100%|██████████| 157/157 [01:25<00:00,  1.84it/s]


[ Train | 377/400 ] loss = 0.21405, acc = 0.92755


100%|██████████| 57/57 [00:17<00:00,  3.20it/s]


[ Valid | 377/400 ] loss = 1.05243, acc = 0.79191
[ Valid | 377/400 ] loss = 1.05243, acc = 0.79191
Learning rate: 2.737551378883141e-06


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 378/400 ] loss = 0.21636, acc = 0.92735


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 378/400 ] loss = 1.07000, acc = 0.78762
[ Valid | 378/400 ] loss = 1.07000, acc = 0.78762
Learning rate: 1.3945816279708058e-06


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 379/400 ] loss = 0.20896, acc = 0.92994


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 379/400 ] loss = 1.04171, acc = 0.79637
[ Valid | 379/400 ] loss = 1.04171, acc = 0.79637 -> best
Best model found at epoch 378, saving model
Learning rate: 5.75340355330323e-07


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 380/400 ] loss = 0.21193, acc = 0.93004


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 380/400 ] loss = 1.01160, acc = 0.80213
[ Valid | 380/400 ] loss = 1.01160, acc = 0.80213 -> best
Best model found at epoch 379, saving model
Learning rate: 4.052555153018978e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 381/400 ] loss = 0.20203, acc = 0.93063


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 381/400 ] loss = 1.04800, acc = 0.79077
[ Valid | 381/400 ] loss = 1.04800, acc = 0.79077
Learning rate: 4.027792988528356e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 382/400 ] loss = 0.25209, acc = 0.91481


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 382/400 ] loss = 1.04030, acc = 0.78531
[ Valid | 382/400 ] loss = 1.04030, acc = 0.78531
Learning rate: 3.954116221727163e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 383/400 ] loss = 0.24426, acc = 0.91750


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 383/400 ] loss = 1.11269, acc = 0.78227
[ Valid | 383/400 ] loss = 1.11269, acc = 0.78227
Learning rate: 3.8333390191332126e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 384/400 ] loss = 0.25745, acc = 0.91312


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 384/400 ] loss = 1.00106, acc = 0.78696
[ Valid | 384/400 ] loss = 1.00106, acc = 0.78696
Learning rate: 3.668435316310924e-05


100%|██████████| 157/157 [01:27<00:00,  1.80it/s]


[ Train | 385/400 ] loss = 0.25030, acc = 0.91352


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 385/400 ] loss = 1.03457, acc = 0.77825
[ Valid | 385/400 ] loss = 1.03457, acc = 0.77825
Learning rate: 3.4634655897077937e-05


100%|██████████| 157/157 [01:26<00:00,  1.80it/s]


[ Train | 386/400 ] loss = 0.23936, acc = 0.91829


100%|██████████| 57/57 [00:18<00:00,  3.14it/s]


[ Valid | 386/400 ] loss = 1.06957, acc = 0.78042
[ Valid | 386/400 ] loss = 1.06957, acc = 0.78042
Learning rate: 3.223476874248313e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 387/400 ] loss = 0.24622, acc = 0.91580


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 387/400 ] loss = 1.03791, acc = 0.78670
[ Valid | 387/400 ] loss = 1.03791, acc = 0.78670
Learning rate: 2.9543784885839765e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 388/400 ] loss = 0.23267, acc = 0.92297


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 388/400 ] loss = 1.05835, acc = 0.78858
[ Valid | 388/400 ] loss = 1.05835, acc = 0.78858
Learning rate: 2.6627965280561796e-05


100%|██████████| 157/157 [01:24<00:00,  1.86it/s]


[ Train | 389/400 ] loss = 0.23425, acc = 0.91760


100%|██████████| 57/57 [00:17<00:00,  3.29it/s]


[ Valid | 389/400 ] loss = 1.06615, acc = 0.78426
[ Valid | 389/400 ] loss = 1.06615, acc = 0.78426
Learning rate: 2.355910708238163e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 390/400 ] loss = 0.22914, acc = 0.92227


100%|██████████| 57/57 [00:17<00:00,  3.26it/s]


[ Valid | 390/400 ] loss = 1.02990, acc = 0.78645
[ Valid | 390/400 ] loss = 1.02990, acc = 0.78645
Learning rate: 2.041277576509489e-05


100%|██████████| 157/157 [01:22<00:00,  1.90it/s]


[ Train | 391/400 ] loss = 0.22981, acc = 0.92367


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 391/400 ] loss = 1.07443, acc = 0.79118
[ Valid | 391/400 ] loss = 1.07443, acc = 0.79118
Learning rate: 1.7266444447808157e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 392/400 ] loss = 0.21587, acc = 0.92615


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 392/400 ] loss = 1.08412, acc = 0.78453
[ Valid | 392/400 ] loss = 1.08412, acc = 0.78453
Learning rate: 1.4197586249627985e-05


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 393/400 ] loss = 0.23416, acc = 0.91849


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 393/400 ] loss = 1.00928, acc = 0.79205
[ Valid | 393/400 ] loss = 1.00928, acc = 0.79205
Learning rate: 1.1281766644350016e-05


100%|██████████| 157/157 [01:26<00:00,  1.81it/s]


[ Train | 394/400 ] loss = 0.22093, acc = 0.92715


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 394/400 ] loss = 1.03712, acc = 0.78483
[ Valid | 394/400 ] loss = 1.03712, acc = 0.78483
Learning rate: 8.590782787706653e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 395/400 ] loss = 0.21190, acc = 0.92864


100%|██████████| 57/57 [00:18<00:00,  3.16it/s]


[ Valid | 395/400 ] loss = 0.99821, acc = 0.79740
[ Valid | 395/400 ] loss = 0.99821, acc = 0.79740
Learning rate: 6.190895633111842e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 396/400 ] loss = 0.21665, acc = 0.92625


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]


[ Valid | 396/400 ] loss = 1.04138, acc = 0.79363
[ Valid | 396/400 ] loss = 1.04138, acc = 0.79363
Learning rate: 4.14119836708054e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 397/400 ] loss = 0.20874, acc = 0.93213


100%|██████████| 57/57 [00:17<00:00,  3.18it/s]


[ Valid | 397/400 ] loss = 1.06840, acc = 0.78143
[ Valid | 397/400 ] loss = 1.06840, acc = 0.78143
Learning rate: 2.4921613388576524e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 398/400 ] loss = 0.20779, acc = 0.93073


100%|██████████| 57/57 [00:18<00:00,  3.15it/s]


[ Valid | 398/400 ] loss = 1.04410, acc = 0.79776
[ Valid | 398/400 ] loss = 1.04410, acc = 0.79776
Learning rate: 1.2843893129181525e-06


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 399/400 ] loss = 0.19635, acc = 0.93232


100%|██████████| 57/57 [00:18<00:00,  3.13it/s]


[ Valid | 399/400 ] loss = 1.05995, acc = 0.79196
[ Valid | 399/400 ] loss = 1.05995, acc = 0.79196
Learning rate: 5.476216449062179e-07


100%|██████████| 157/157 [01:26<00:00,  1.82it/s]


[ Train | 400/400 ] loss = 0.19761, acc = 0.93461


100%|██████████| 57/57 [00:17<00:00,  3.17it/s]

[ Valid | 400/400 ] loss = 1.04619, acc = 0.79338
[ Valid | 400/400 ] loss = 1.04619, acc = 0.79338





# Dataloader for test

In [34]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = FoodDataset("./test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
test_set_transformed = FoodDataset("./test", tfm=train_tfm)
test_loader_transformed = DataLoader(test_set_transformed, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# Testing and generate prediction CSV

In [37]:
model_best = VGG13.to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
prediction_transformed = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        prediction += test_pred.squeeze().tolist()

    for data,_ in tqdm(test_loader_transformed):
        test_transformed_pred = model_best(data.to(device))
        prediction_transformed += test_transformed_pred.squeeze().tolist()

100%|██████████| 47/47 [00:15<00:00,  3.03it/s]
100%|██████████| 47/47 [00:13<00:00,  3.50it/s]


In [38]:
prediction_ensemble = []
prediction = np.array(prediction)
prediction_transformed = np.array(prediction_transformed)
for i in range(len(prediction)):
        w = 0.9
        test_ensemble_pred = w * prediction[i,:] + (1-w) * prediction_transformed[i,:]
        test_ensemble_label = np.argmax(test_ensemble_pred, axis=0)
        prediction_ensemble.append(test_ensemble_label)

In [39]:
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Category"] = prediction_ensemble
df.to_csv("submission.csv",index = False)

# Q1. Augmentation Implementation
## Implement augmentation by finishing train_tfm in the code with image size of your choice. 
## Directly copy the following block and paste it on GradeScope after you finish the code
### Your train_tfm must be capable of producing 5+ different results when given an identical image multiple times.
### Your  train_tfm in the report can be different from train_tfm in your training code.


In [31]:
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You can add some transforms here.
    transforms.ToTensor(),
])

# Q2. Visual Representations Implementation
## Visualize the learned visual representations of the CNN model on the validation set by implementing t-SNE (t-distributed Stochastic Neighbor Embedding) on the output of both top & mid layers (You need to submit 2 images). 


In [32]:
# import torch
# import numpy as np
# from sklearn.manifold import TSNE
# import matplotlib.pyplot as plt
# from tqdm import tqdm
# import matplotlib.cm as cm
# import torch.nn as nn

# device = 'cuda' if torch.cuda.is_available() else 'cpu'

# # Load the trained model
# model = DenseNet169.to(device)
# state_dict = torch.load(f"{_exp_name}_best.ckpt")
# model.load_state_dict(state_dict)
# model.eval()

# print(model)

In [33]:
# # Load the vaildation set defined by TA
# valid_set = FoodDataset("./valid", tfm=test_tfm)
# valid_loader = DataLoader(valid_set, batch_size=64, shuffle=False, num_workers=0, pin_memory=True)

# # Extract the representations for the specific layer of model
# index = ... # You should find out the index of layer which is defined as "top" or 'mid' layer of your model.
# features = []
# labels = []
# for batch in tqdm(valid_loader):
#     imgs, lbls = batch
#     with torch.no_grad():
#         logits = model.cnn[:index](imgs.to(device))
#         logits = logits.view(logits.size()[0], -1)
#     labels.extend(lbls.cpu().numpy())
#     logits = np.squeeze(logits.cpu().numpy())
#     features.extend(logits)
    
# features = np.array(features)
# colors_per_class = cm.rainbow(np.linspace(0, 1, 11))

# # Apply t-SNE to the features
# features_tsne = TSNE(n_components=2, init='pca', random_state=42).fit_transform(features)

# # Plot the t-SNE visualization
# plt.figure(figsize=(10, 8))
# for label in np.unique(labels):
#     plt.scatter(features_tsne[labels == label, 0], features_tsne[labels == label, 1], label=label, s=5)
# plt.legend()
# plt.show()

Kaggle public score 0.80133