In [1]:
!pip install -r requirements.txt -qU

In [2]:
import torch
import torchvision.transforms.v2 as v2
import torchvision.datasets as ds
from torchvision.models.resnet import ResNet50_Weights
import resnet50
import numpy as np
from sklearn.model_selection import KFold
from torch.utils.data import DataLoader, Dataset
from torch.optim import SGD, Adam, Adamax
from torch.nn import CrossEntropyLoss
import train

In [3]:
import wandb
import os
os.environ['WANDB_NOTEBOOK_NAME'] = '/home/hieu/Desktop/Computer_Vision/General/classification.ipynb'
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mhieu1001[0m ([33mno-organization-123[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
# Distribution of labels
# Labels are not equally distributed, need to handle later
 

Tracking performance

Config
- Hyperparam of data
  - batch size
- Hyperparam of optimizer:
  - learning rate
  - epoch
- Hyperparam of model:
  - pretrained model
  - non-freezed layers

Metric log (for error analysis)
- Prediction:
  - Overall: Accuracy, Precision, Recall, F1 score. Micro-averaging and macro averaging, micro-averaging gives equal weight to each class while micro averaging gives equal weight to each instance. Accuracy = (Correct predictions / All predictions): disregards class imbalance and cost of different errors. The most straight forward way is to calculate the Recall and Precision for each class. Recall = (correct class A prediction) / (all class A instances); Precision = (correct class A) / (all class A prediction)
- Weights: histogram, norm, 
- Grad: Histogram of gradient, gradient norm in each layer
  




In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Hyperparameters
config = {
    'K': 5,
    'SEED': 42,
    'ROOT': 'data',
    'BATCH': 32,
    'NOF_CLASSES': 257,
    'TEST_SPLIT': 0.2,
    'EPOCH': 20,
    'LR': 0.001,
    'MODEL': "resnet50",
    'NON_FREEZE_PARAMS': ['fc.weight', 'fc.bias']
}
np.random.seed(config["SEED"])

# Transform and Augment data, for each batch, right now using
# Resize only, can change to resize random crop later.
transforms = v2.Compose([
    v2.ToImage(),
    v2.Resize(size=(224, 224), antialias=True),
    v2.RandomHorizontalFlip(p=0.5),
    v2.AugMix(),
    v2.ToDtype(torch.float32, scale=True),
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Create dataset, split into training and testing dataset
caltech256 = ds.Caltech256(
    root= config["ROOT"],
    download= False,
    transform= transforms
)
indices = np.arange(0, len(caltech256))
np.random.shuffle(indices)
test_indices = indices[0: int(len(caltech256) * config["TEST_SPLIT"])]
train_indices = indices[int(len(caltech256) * config["TEST_SPLIT"]):]
categories = caltech256.categories

# Initialize pretrained model, freeze other layer except for last layer
weights = ResNet50_Weights.DEFAULT
process = weights.transforms()

# get_state_dict return an ordered_dict
# whihc basically means iterate through
# this dist is ordered
# Popitem(last = True) pop the last item
states = weights.get_state_dict()
states.popitem()
states.popitem()

model = resnet50.ResNet(resnet50.Bottleneck, [3, 4, 6, 3], num_classes= config["NOF_CLASSES"])
# Strict = False so we do not need to specify fn layer (keep as random)
model.load_state_dict(states, strict = False)
for name, parameter in model.named_parameters():
    if name in states.keys():
        parameter.requires_grad = False
        
# Eval mode do not disable the gradient but only affect
# module such as Dropout and BatchNorm
model.eval()
model.to(device)

# Main training loop
kfold  = KFold(n_splits= 10, shuffle= True, random_state= config["SEED"])

for i, (train_index_index, val_index_index) in enumerate(kfold.split(train_indices)):
    # Kfold shuffle is really bad, so many same class in one batch
    train_index = train_indices[train_index_index]
    val_index = train_indices[val_index_index]
    # Init dataloader
    train_dataloader = DataLoader(caltech256, batch_size= config["BATCH"], sampler= train_index)
    val_dataloader = DataLoader(caltech256, batch_size= config["BATCH"], sampler= val_index)
    # Init optimizer
    optimizer = Adam(model.parameters(), lr = config["LR"])
    # Init criterion
    criterion = CrossEntropyLoss()
    # Init wandb
    # wandb.init(
    #     project = "caltech256-classification",
    #     config = config,
    #     id= f"Fold: {i}"
    # )
    print("Start training")
    train.train(model, criterion, optimizer, val_dataloader, device)
    train.test(model, criterion, val_dataloader, categories, "validation", device )
    break


Start training
Training Loss: 5.5767
Training Loss: 5.4715
Training Loss: 5.5497
Training Loss: 5.4958
Training Loss: 5.4544
Training Loss: 5.4222
Training Loss: 5.3939
Training Loss: 5.4752
Training Loss: 5.3393
Training Loss: 5.2530
Training Loss: 5.3205
Training Loss: 5.2185
Training Loss: 5.4943
Training Loss: 5.3840
Training Loss: 5.3000
Training Loss: 5.0481
Training Loss: 5.2176
Training Loss: 5.2675
Training Loss: 4.9825
Training Loss: 5.0328
Training Loss: 4.9556
Training Loss: 5.1972
Training Loss: 5.2894
Training Loss: 5.1819
Training Loss: 5.0046
Training Loss: 5.0833
Training Loss: 5.2850
torch.Size([817])
torch.Size([817, 257])
Testing Loss: 4.2451
