In [1]:
from typing import Any, OrderedDict, List, Dict
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models.resnet import resnet50
from torchvision.transforms import v2 as T
import matplotlib.pyplot as plt
import json
from torchmetrics import Accuracy
from lightly.transforms.utils import IMAGENET_NORMALIZE
from PIL import Image

## Load Data

In [2]:
class ImageDataset(Dataset):
    def __init__(self, set_map: List[Dict], transform=None) -> None:
        ''' Each item in set_map is expected to contain:
                img_path: Full path to image,
                label: Label corresponding to image at img_path
        '''

        self.set_map = set_map
        self.transform=transform

    def __len__(self):
        return len(self.set_map)
    
    def __getitem__(self, index):   
        sample = self.set_map[index]

        # image = read_image(sample['img_path'])
        image = Image.open(sample['img_path'])

        if self.transform:
            image = self.transform(image)

        return dict(image=image, **sample)

In [3]:
with open('./data/train.json', 'r') as file:
    train_set_map = json.load(file)

In [4]:
with open('./data/test.json', 'r') as file:
    test_set_map = json.load(file)

In [5]:
label_map = {
    'DGG': 0,
    'PH': 1,
    'EH': 2
}

domain_map = {
    'cartoon': 0,
    'art_painting': 1,
    'photo': 2,
}

In [6]:
for elem in train_set_map:
    elem['label'] = label_map[elem['label']]
for elem in test_set_map:
    elem['label'] = label_map[elem['label']]

In [7]:
transform = T.Compose([
    T.Resize(96),
    T.ToTensor(),
    T.Normalize(
        mean=IMAGENET_NORMALIZE["mean"],
        std=IMAGENET_NORMALIZE["std"],
    ),
])



In [8]:
train_set = ImageDataset(train_set_map, transform=transform)
test_set = ImageDataset(test_set_map, transform=transform)

In [9]:
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=4, persistent_workers=True, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=4, persistent_workers=True, pin_memory=True)

## Validate Data

In [10]:
stats = {}
for elem in train_set_map:
    domain = elem['domain']
    label = elem['label']

    if domain not in stats.keys() :
        stats[domain] = {}

    if label not in stats[domain].keys():
        stats[domain][label] = 1
    else:
        stats[domain][label] += 1

In [11]:
stats

{'art_painting': {0: 771, 2: 77, 1: 77},
 'photo': {1: 641, 2: 64, 0: 64},
 'cartoon': {2: 696, 1: 70, 0: 70}}

In [12]:
stats = {}
for elem in test_set_map:
    domain = elem['domain']
    label = elem['label']

    if domain not in stats.keys() :
        stats[domain] = {}

    if label not in stats[domain].keys():
        stats[domain][label] = 1
    else:
        stats[domain][label] += 1

In [13]:
stats

{'cartoon': {2: 85, 1: 72, 0: 82},
 'art_painting': {1: 75, 0: 77, 2: 41},
 'photo': {1: 71, 2: 46, 0: 58}}

## Train Models

In [14]:
import pytorch_lightning as L

In [21]:
from typing import Any


class LinearProbe(L.LightningModule):
    def __init__(self, backbone, emb_dim, num_classes, lr=1e-3) -> None:
        super().__init__()
        
        self.backbone: nn.Module = backbone
        for param in self.backbone.parameters():
            param.requires_grad = False

        self.linear_head: nn.Module = nn.Linear(emb_dim, num_classes)
        self.criterion = nn.CrossEntropyLoss()
        self.lr = lr

        self.test_accuracy = Accuracy(task='multiclass', num_classes=num_classes)

    def forward(self, x):
        x = self.backbone(x)
        x = self.linear_head(x)

        return x

    def training_step(self, batch, bacth_idx):
        X = batch['image']
        t = batch['label']

        y = self.forward(X)
        loss = self.criterion(y, t)

        print(loss.item())

        return loss
    
    def test_step(self, batch, batch_idx):
        X = batch['image']
        t = batch['label']

        y = self.forward(X)
        acc = self.test_accuracy(y, t)

        self.log('accuracy', acc, on_epoch=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.lr)

In [22]:
def get_backbone_from_ckpt(ckpt_path: str) -> torch.nn.Module:
    state_dict = torch.load(ckpt_path)["state_dict"]
    state_dict = OrderedDict([
        (".".join(name.split(".")[1:]), param) for name, param in state_dict.items() if name.startswith("backbone")
    ])

    return state_dict

In [23]:
# Baseline Model
model_bl  = resnet50()
weights_bl = get_backbone_from_ckpt("./r50_bt.ckpt")
model_bl.load_state_dict(weights_bl, strict=False)
model_bl.fc = torch.nn.Identity()
model_bl = model_bl.cuda()

In [24]:
# MixStyle Model
model_ms  = resnet50()
weights_ms = get_backbone_from_ckpt("./r50_bt_ms.ckpt")
model_ms.load_state_dict(weights_ms, strict=False)
model_ms.fc = torch.nn.Identity()
model_ms = model_ms.cuda()

In [25]:
baseline_module = LinearProbe(model_bl, emb_dim=2048, num_classes=3)

In [26]:
mixstyle_module = LinearProbe(model_ms, emb_dim=2048, num_classes=3)

In [27]:
trainer = L.Trainer(
    max_epochs=50
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [28]:
trainer.fit(baseline_module, train_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | backbone      | ResNet             | 23.5 M
1 | linear_head   | Linear             | 6.1 K 
2 | criterion     | CrossEntropyLoss   | 0     
3 | test_accuracy | MulticlassAccuracy | 0     
-----------------------------------------------------
6.1 K     Trainable params
23.5 M    Non-trainable params
23.5 M    Total params
94.057    Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

1.102203607559204
1.0783448219299316
0.7364187836647034
0.7673234343528748
0.6876720190048218
0.6459776759147644
0.6896953582763672
0.4281481206417084
0.4382372796535492
0.40173590183258057
0.28852713108062744
0.3500955402851105
0.3288230001926422
0.4467149078845978
0.36273136734962463
0.39633241295814514
0.19182975590229034
0.23059287667274475
0.36232662200927734
0.22963839769363403
0.23325905203819275
0.34299546480178833
0.45277538895606995
0.2148977369070053
0.21161513030529022
0.2040417492389679
0.14547623693943024
0.25915011763572693
0.18163786828517914
0.09508597105741501
0.34057706594467163
0.21932990849018097
0.42639485001564026
0.13555864989757538
0.21781426668167114
0.13320691883563995
0.3083781898021698
0.30174896121025085
0.15378481149673462
0.2564076781272888
0.06951312720775604
0.15704645216464996
0.2158033549785614
0.22311104834079742
0.28701454401016235
0.09676062315702438
0.2416335642337799
0.25954926013946533
0.31654807925224304
0.0593862347304821
0.2597651779651642
0

  return F.conv2d(input, weight, bias, self.stride,


0.14873646199703217
0.08547630161046982
0.1182396337389946
0.26489126682281494
0.05868558585643768
0.10709159821271896
0.30297401547431946
0.18410877883434296
0.16009074449539185
0.1825396567583084
0.1810896396636963
0.10753834247589111
0.18052758276462555
0.06611748039722443
0.1466178148984909
0.1796846091747284
0.2689115107059479
0.13547572493553162
0.11718007922172546
0.09359461069107056
0.13867710530757904
0.10743938386440277
0.08339846879243851
0.09135109931230545
0.15894292294979095
0.3523189127445221
0.07199566066265106
0.05194561928510666
0.23460696637630463
0.25822916626930237
0.19697774946689606
0.07199206203222275
0.18242742121219635
0.14530035853385925
0.38389214873313904
0.29084378480911255
0.08407393842935562
0.16967473924160004
0.12595370411872864
0.09056224673986435
0.12164541333913803
0.2207862287759781
0.24012835323810577
0.18671035766601562
0.10785545408725739
0.09067081660032272
0.10644348710775375
0.10775735229253769
0.10708388686180115
0.15669456124305725
0.079205

`Trainer.fit` stopped: `max_epochs=50` reached.


In [29]:
trainer.test(baseline_module, dataloaders=test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

/home/yasin/miniforge3/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 32. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
/home/yasin/miniforge3/lib/python3.10/site-packages/pytorch_lightning/utilities/data.py:77: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 31. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.


[{'accuracy': 0.8682042956352234}]

In [31]:
trainer = L.Trainer(
    max_epochs=50
)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [32]:
trainer.fit(mixstyle_module, train_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type               | Params
-----------------------------------------------------
0 | backbone      | ResNet             | 23.5 M
1 | linear_head   | Linear             | 6.1 K 
2 | criterion     | CrossEntropyLoss   | 0     
3 | test_accuracy | MulticlassAccuracy | 0     
-----------------------------------------------------
6.1 K     Trainable params
23.5 M    Non-trainable params
23.5 M    Total params
94.057    Total estimated model params size (MB)


Training: |          | 0/? [00:00<?, ?it/s]

1.1378557682037354
0.9950240254402161
0.7739367485046387
0.6455991268157959
0.6010487675666809
0.5043728947639465
0.363283634185791
0.46410974860191345
0.259242445230484
0.23076899349689484
0.40625983476638794
0.28732937574386597
0.27885931730270386
0.2659675180912018
0.09500522166490555
0.14809413254261017
0.24415841698646545
0.17116379737854004
0.17060866951942444
0.1722855567932129
0.38266611099243164
0.13856476545333862
0.18323436379432678
0.21574001014232635
0.27842065691947937
0.1907137930393219
0.0914531722664833
0.1358339488506317
0.06954365223646164
0.1390431672334671
0.17873843014240265
0.29723668098449707
0.1348518282175064
0.06688132137060165
0.35561254620552063
0.21970687806606293
0.2972590923309326
0.28369149565696716
0.28400954604148865
0.3575889766216278
0.18015670776367188
0.30303332209587097
0.29367905855178833
0.3834388554096222
0.2873428761959076
0.3352813422679901
0.39253854751586914
0.1137581542134285
0.471649169921875
0.15427614748477936
0.12153270840644836
0.291

`Trainer.fit` stopped: `max_epochs=50` reached.


In [33]:
trainer.test(mixstyle_module, dataloaders=test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]

[{'accuracy': 0.8698517084121704}]