In [1]:
from pathlib import Path

import numpy as np
import torch
import wandb
from PIL import Image
from scipy.special import softmax
from torch import nn
from torch.utils.data import DataLoader
from torcheval.metrics.functional import multiclass_f1_score
from torchvision.datasets import ImageFolder
from torchvision.models import ResNet152_Weights, resnet152
from tqdm import tqdm

from sneakers_ml.models.onnx_utils import get_session, predict, save_torch_model

In [2]:
weights = ResNet152_Weights.DEFAULT
preprocess = weights.transforms()
torch.set_float32_matmul_precision("medium")

device = "cuda:0" if torch.cuda.is_available() else "cpu"
train = "data/training/brands-classification-splits/train"
val = "data/training/brands-classification-splits/val"
test = "data/training/brands-classification-splits/test"

train_dataset = ImageFolder(train, transform=preprocess)
val_dataset = ImageFolder(val, transform=preprocess)
test_dataset = ImageFolder(test, transform=preprocess)

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, drop_last=False, num_workers=4)
val_dataloader = DataLoader(val_dataset, batch_size=128, shuffle=False, drop_last=False, num_workers=4)
test_dataloader = DataLoader(test_dataset, batch_size=128, shuffle=False, drop_last=False, num_workers=4)

In [3]:
path = "data/models/brands-classification/resnet152-finetune-classes.npy"
save_path = Path(path)
save_path.parent.mkdir(parents=True, exist_ok=True)
class_to_idx = train_dataset.class_to_idx
with save_path.open("wb") as save_file:
    np.save(save_file, np.array(list(class_to_idx.items())), allow_pickle=False)

In [4]:
class ResNet152Classifier(nn.Module):
    def __init__(self, num_classes: int) -> None:
        super().__init__()
        self.num_classes = num_classes
        weights = ResNet152_Weights.DEFAULT
        backbone = resnet152(weights=weights)
        num_filters = backbone.fc.in_features
        backbone.fc = nn.Linear(num_filters, self.num_classes)
        extractor_layers = list(backbone.children())[:-3]
        trainable_bottleneck_layers = list(backbone.children())[-3:-1]
        classifier_layer = list(backbone.children())[-1]
        self.feature_extractor = nn.Sequential(*extractor_layers)
        self.feature_extractor.eval()

        self.trainable_bottleneck = nn.Sequential(*trainable_bottleneck_layers)
        self.classifier = nn.Sequential(classifier_layer)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        with torch.no_grad():
            x = self.feature_extractor(x)
        x = self.trainable_bottleneck(x)
        x = torch.flatten(x, 1)
        return self.classifier(x)

In [5]:
num_classes = len(train_dataset.classes)
model = ResNet152Classifier(num_classes)
model.to(device)

ResNet152Classifier(
  (feature_extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
    

In [6]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
    [{"params": model.trainable_bottleneck.parameters()}, {"params": model.classifier.parameters()}], lr=0.001
)

In [7]:
def calculate_metrics(y_pred: torch.Tensor, y_true: torch.Tensor):
    f1_macro = multiclass_f1_score(y_pred, y_true, num_classes=num_classes, average="macro")
    f1_micro = multiclass_f1_score(y_pred, y_true, num_classes=num_classes, average="micro")
    f1_weighted = multiclass_f1_score(y_pred, y_true, num_classes=num_classes, average="weighted")
    return f1_macro.item(), f1_micro.item(), f1_weighted.item()


def train_epoch(model, train_dataloader, criterion, optimizer):
    running_loss = 0.0

    model.trainable_bottleneck.train()
    model.classifier.train()

    for data in tqdm(train_dataloader):
        inputs, labels = data[0].to(device), data[1].to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(train_dataloader)


def eval_epoch(model, val_dataloader, criterion):
    running_loss = 0.0
    y_true = []
    y_pred = []

    model.trainable_bottleneck.eval()
    model.classifier.eval()

    with torch.inference_mode():
        for data in tqdm(val_dataloader):
            inputs, labels = data[0].to(device), data[1].to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            y_true.append(labels.cpu())
            y_pred.append(predicted.cpu())

        y_true = torch.cat(y_true)
        y_pred = torch.cat(y_pred)
        f1_macro, f1_micro, f1_weighted = calculate_metrics(y_pred, y_true)

        return running_loss / len(train_dataloader), f1_macro, f1_micro, f1_weighted

In [8]:
wandb.init(project="sneakers_ml")

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mseara[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
num_epochs = 5


def train(model, train_dataloader, criterion, optimizer, val_dataloader):
    for _ in range(num_epochs):
        train_loss = train_epoch(model, train_dataloader, criterion, optimizer)
        val_loss, f1_macro, f1_micro, f1_weighted = eval_epoch(model, val_dataloader, criterion)
        wandb.log(
            {
                "val_f1_macro": f1_macro,
                "val_f1_micro": f1_micro,
                "val_f1_weighted": f1_weighted,
                "val_loss": val_loss,
                "train_loss": train_loss,
            }
        )

In [10]:
train(model, train_dataloader, criterion, optimizer, val_dataloader)

100%|██████████| 25/25 [00:28<00:00,  1.14s/it]
100%|██████████| 9/9 [00:10<00:00,  1.20s/it]
100%|██████████| 25/25 [00:28<00:00,  1.12s/it]
100%|██████████| 9/9 [00:10<00:00,  1.22s/it]
100%|██████████| 25/25 [00:28<00:00,  1.14s/it]
100%|██████████| 9/9 [00:11<00:00,  1.28s/it]
100%|██████████| 25/25 [00:28<00:00,  1.14s/it]
100%|██████████| 9/9 [00:11<00:00,  1.24s/it]
100%|██████████| 25/25 [00:28<00:00,  1.14s/it]
100%|██████████| 9/9 [00:11<00:00,  1.25s/it]


In [11]:
wandb.finish()

VBox(children=(Label(value='0.050 MB of 0.050 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))



0,1
train_loss,█▃▂▁▁
val_f1_macro,▁▆███
val_f1_micro,▁▆▇██
val_f1_weighted,▁▆▇██
val_loss,█▂▁▁▁

0,1
train_loss,0.04065
val_f1_macro,0.83631
val_f1_micro,0.85511
val_f1_weighted,0.85484
val_loss,0.20745


In [12]:
loss, f1_macro, f1_micro, f1_weighted = eval_epoch(model, test_dataloader, criterion)
print(
    {
        "test_f1_macro": f1_macro,
        "test_f1_micro": f1_micro,
        "test_f1_weighted": f1_weighted,
        "test_loss": loss,
    }
)

100%|██████████| 9/9 [00:11<00:00,  1.28s/it]

{'test_f1_macro': 0.8263645172119141, 'test_f1_micro': 0.8452935814857483, 'test_f1_weighted': 0.8463107347488403, 'test_loss': 0.2187335002422333}





In [14]:
model.eval()
model.to("cpu")
torch_input = torch.randn(1, 3, 224, 224)
path = "data/models/brands-classification/resnet152-finetune.onnx"
save_torch_model(model, torch_input, path)

In [43]:
def predict_resnet(images: Image.Image) -> np.ndarray:
    with Path("data/models/brands-classification/resnet152-finetune-classes.npy").open("rb") as file:
        class_to_idx_numpy = np.load(file, allow_pickle=False)
        class_to_idx = dict(zip(class_to_idx_numpy[:, 1].astype(int), class_to_idx_numpy[:, 0]))

    weights = ResNet152_Weights.DEFAULT
    preprocess = weights.transforms()

    def apply_transforms(image: Image.Image) -> torch.Tensor:
        return preprocess(image)  # type: ignore[no-any-return]

    preprocessed_images = torch.stack([apply_transforms(image) for image in images])

    onnx_session = get_session("data/models/brands-classification/resnet152-finetune.onnx", "cpu")

    pred = predict(onnx_session, preprocessed_images)
    softmax_pred = softmax(pred, axis=1)
    predictions = np.argmax(softmax_pred, axis=1)
    string_predictions = np.vectorize(class_to_idx.get)(predictions)
    return predictions, string_predictions

In [44]:
image = Image.open("data/training/brands-classification-splits/train/adidas/1.jpeg")

In [47]:
predict_resnet([image, image])

(array([0, 0]), array(['adidas', 'adidas'], dtype='<U6'))