# Imports and Data paths

In [1]:
import torch
from torchvision.datasets import ImageFolder
from torchvision import transforms
from torch.optim import lr_scheduler
import torch.optim as optim

import os
import wandb
wandb.init(project="M6-Metric-Learning", entity="fantastic5")

from metric_learning_utils import *
from metric_learning_trainer import fit

cuda = torch.cuda.is_available()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

%matplotlib inline
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mfantastic5[0m (use `wandb login --relogin` to force relogin)


# Prepare Dataset

In [2]:
TRIAN_DATA_PATH = "aicity_cars_dataset/train"
TEST_DATA_PATH = "aicity_cars_dataset/test"

batch_size = 32
kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {}

def load_split_train_test(TRIAN_DATA_PATH, TEST_DATA_PATH):
    train_transforms = transforms.Compose([
                            transforms.Resize((256,256)),
                            transforms.ToTensor(),
                            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                                       ])

    train_data = ImageFolder(TRIAN_DATA_PATH,       
                    transform=train_transforms)
    test_data = ImageFolder(TEST_DATA_PATH,
                    transform=train_transforms)

    # Prepare triplet dataset
    triplet_train_dataset = TripletMIT_split(train_data, split='train', transform=train_transforms) # Returns triplet of images and target same/different
    triplet_test_dataset = TripletMIT_split(test_data, split='test', transform=train_transforms)

    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True,)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False)

    triplet_train_loader = torch.utils.data.DataLoader(triplet_train_dataset, batch_size=batch_size, shuffle=True, **kwargs)
    triplet_test_loader = torch.utils.data.DataLoader(triplet_test_dataset, batch_size=batch_size, shuffle=False, **kwargs)

    return train_loader, test_loader, triplet_train_loader, triplet_test_loader

train_loader, test_loader, triplet_train_loader, triplet_test_loader = load_split_train_test(TRIAN_DATA_PATH, TEST_DATA_PATH)

num_classes = len(train_loader.dataset.classes)

In [4]:
# Set up the network and training parameters

save_path = 'aicity_cars_metric_learning_results/metric_learning_ResNet50.pth'

## test different backbones
embedding_net = EmbeddingNet_V3('resnet18', '0')

model = TripletNet(embedding_net)

if cuda:
    model.cuda()

if not os.path.exists(save_path):
    if cuda:
        model.cuda()
        print('Cuda!!!')

    margin = 1.
    loss_fn = TripletLoss(margin)
    lr = 1e-3
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.StepLR(optimizer, 20, gamma=0.5, last_epoch=-1)
    n_epochs = 50
    log_interval = 10
    ## Training !!!

    wandb.config = {
        "learning_rate": lr,
        "epochs": n_epochs,
        "batch_size": batch_size
    }


    fit(triplet_train_loader, triplet_test_loader, model, loss_fn, optimizer, scheduler, n_epochs, cuda, log_interval)
    torch.save(model.state_dict(), save_path)
else:
    print('Loading model...')
    model.load_state_dict(torch.load(save_path))
    model.to(device)

Using cache found in /home/group05/.cache/torch/hub/pytorch_vision_v0.10.0


Cuda!!!


RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 23.70 GiB total capacity; 10.34 GiB already allocated; 2.69 MiB free; 11.13 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF