In [1]:
from pathlib import Path
if Path.cwd().parent.stem == 't2':
    %cd ..
%config Completer.use_jedi = False

/home/step/Personal/UCH/2021-sem1/VisionComp/t2


## Seed

In [2]:
import torch
import pytorch_lightning as pl

pl.seed_everything(hash("setting a random seeds") % 2**32 - 1)
torch.backends.cudnn.benchmark = True

# perform dataset simple check
check = False

Global seed set to 2174192935


## Define Dataset

In [3]:
from torch.utils.data import random_split, DataLoader, Dataset
from torchvision import transforms, datasets
import skimage.morphology as morph

class ErosionReplicate(object):
    """Applies Erosion to one channel and replicates the channel"""
    def __call__(self, tensor):
        one_channel = tensor[0, :, :]
        one_channel = torch.from_numpy(morph.erosion(one_channel, morph.square(3)))    
        tensor[0, :, :] = one_channel
        tensor[1, :, :] = one_channel
        tensor[2, :, :] = one_channel
        return tensor
        

class SimpleDataset(datasets.ImageFolder):
    def __init__(self, base_dir: str, kind: str):
        tr = [
            transforms.Resize([224,224]),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ]
        
        if kind == 'sketch':
            self.base_dir = str(Path(base_dir) / 'png_w256')
            tr += [ErosionReplicate()]
        elif kind == 'photo': 
            self.base_dir = base_dir 
        
        tr += [transforms.Normalize(mean=(0.5,0.5,0.5), std=(0.5,0.5,0.5))]
        
        self.transforms = transforms.Compose(tr)
        
        super(SimpleDataset, self).__init__(
            self.base_dir, transform = self.transforms)
        
        self.n_classes = len(self.classes)
        self.idx_to_class = {idx: label for label, idx in self.class_to_idx.items()}

## Define DataModule

In [4]:
from pathlib import Path

import numpy as np

        
class SimpleDataModule(pl.LightningDataModule):
    def __init__(self, data_dir: str, kind: str, batch_size: int = 32, truncate: int = None,
                train_split: int = 0.8, test_split: int = 0.5):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.kind = kind
        self.truncate = truncate
        self.train_split = train_split
        self.test_split = test_split
        
    def setup(self, stage: str = None):
        dataset = SimpleDataset(self.data_dir, kind = self.kind)
        self.n_classes = dataset.n_classes
        
        if self.truncate is not None:
            # Split the indices in a stratified way
            indices = np.random.choice(len(dataset), size=(self.truncate,), replace=False)
          
            # Warp into Subsets
            dataset = torch.utils.data.Subset(dataset, indices)
            
        self.dataset = dataset
        
        
        train_set, test_set = torch.utils.data.random_split(dataset, 
            [int(self.train_split*len(dataset)), len(dataset) - int(self.train_split*len(dataset))])
        
        val_set, test_set = torch.utils.data.random_split(test_set, 
            [int(self.test_split*len(test_set)), len(test_set) - int(self.test_split*len(test_set))])        
        
        # Assign train/val datasets for use in dataloaders
        if stage == 'fit' or stage is None:
            self.train_set, self.val_set = train_set, val_set
            
        # Assign test dataset for use in dataloader(s)
        if stage == 'test' or stage is None:
            self.val_set, self.test_set = val_set, test_set

    def train_dataloader(self):
        return DataLoader(self.train_set, num_workers=6, batch_size=self.batch_size, pin_memory=True)

    def val_dataloader(self):
        return DataLoader(self.val_set, num_workers=6, batch_size=self.batch_size, pin_memory=True)

    def test_dataloader(self):
        return DataLoader(self.test_set, num_workers=6, batch_size=self.batch_size, pin_memory=True)

### Checks

In [5]:
if check:
    import matplotlib.pyplot as plt
    dm = SimpleDataModule('data/Sketch_EITZ/', kind='sketch')
    dm.setup('fit')

    x, y = dm.train_set[0]

    plt.imshow(x.permute(2,1,0))

    print('sketch eitz: \t', y, dm.dataset.idx_to_class[y], dm.n_classes, len(dm.dataset))
    
    
    dm = SimpleDataModule('data/Flickr25K/', kind='photo')
    dm.setup('fit')

    x, y = dm.train_set[0]

    plt.imshow(x.permute(2,1,0))

    print('flickr25k: \t',y, dm.dataset.idx_to_class[y], dm.n_classes, len(dm.dataset))

## Define Classifier

In [6]:
from torch import nn
from torch import optim

import torchmetrics

class Classifier(pl.LightningModule):
    def __init__(self, model: nn.Module, n_classes: int, params: int):
        super().__init__()
        
        # add new fc layers            
        model.fc = nn.Linear(model.fc.in_features, n_classes)        
        self.net = model
        self.criterion = nn.CrossEntropyLoss()
        
        # hparams
        self.hparams.update(params)
        self.save_hyperparameters()
        
        # two accs to have different states
        self.acc_train = torchmetrics.Accuracy()
        self.acc_val = torchmetrics.Accuracy()
        self.test_val = torchmetrics.Accuracy()
       
    def forward(self, x):
        return self.net(x)

    def training_step(self, batch,  batch_idx):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)

        self.log('train/loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('train/acc', self.acc_train(y_hat.softmax(dim=-1), y), 
                 on_step=False, on_epoch=True, prog_bar=False, logger=True)
        return {'loss': loss}

    def validation_step(self, batch, batch_ixd):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        
        self.log('val_loss', loss, on_step=True, logger=False)
        self.log('val/loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('val/acc', self.acc_val(y_hat.softmax(dim=-1), y), 
                 on_step=False, on_epoch=True, prog_bar=False, logger=True)
        return {'loss': loss}
    
    def test_step(self, batch, batch_ixd):
        x, y = batch
        y_hat = self.forward(x)
        loss = self.criterion(y_hat, y)
        
        self.log('test/loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        self.log('test/acc', self.test_val(y_hat.softmax(dim=-1), y), 
                 on_step=False, on_epoch=True, prog_bar=False, logger=True)
        return {'loss': loss}
    
    def configure_optimizers(self):
        opt = [optim.SGD(
            self.parameters(), 
            lr=self.hparams.lr,
            momentum=0.9,
            nesterov=True,
            weight_decay=self.hparams.weight_decay
        )]
        sched = [optim.lr_scheduler.CyclicLR(opt[0], 10e-3, 20e-2, cycle_momentum=False)]
        return opt, sched
    
    def optimizer_zero_grad(self, epoch, batch_idx, optimizer, optimizer_idx):
        optimizer.zero_grad(set_to_none=True)

## HParams

In [7]:
PARAMS = {
    'lr' : 10e-3,
    'weight_decay': 0.03,
    'max_epochs': 45,
    'batch_size': 64,
    'patience': 10,
    'precision': 16,
    'base_dir': Path('data/Sketch_EITZ/'),
    'train_split': 0.8,
    'test_split': 0.5,
    'truncate':15000
}

## Loggers

In [8]:
from pytorch_lightning.loggers.neptune import NeptuneLogger
import os

api_key = os.environ['NEPTUNE']

logger = NeptuneLogger(
    api_key=api_key,
    project_name="victor.faraggi/vision-dcc",
    experiment_name='hw2-pretrain-eitz-0.3-sgd', # attention to git, this creates a directory
    params=PARAMS
)

NeptuneLogger will work in online mode


## Instantiate

In [9]:
import models

model = models.resnet34()

dm = SimpleDataModule(
    data_dir = PARAMS['base_dir'], 
    kind = 'sketch', 
    batch_size = PARAMS['batch_size'],
    train_split = PARAMS['train_split'], 
    test_split = PARAMS['test_split'],
    truncate = PARAMS['truncate']
)

dm.setup()

clf = Classifier(model, dm.n_classes, PARAMS)

## Trainer

In [10]:
from pytorch_lightning.plugins import DDPPlugin
from pytorch_lightning import callbacks as cb

checkpoint_cb = cb.ModelCheckpoint(
    monitor='val/loss',
    dirpath='snapshots/resnet-eitz/',
    filename='resnet-eitz-{epoch:02d}-{val_loss:.2f}',
    mode='min'
)

early_stopping_cb = cb.EarlyStopping('val/loss', patience=PARAMS['patience'])

# trainer = pl.Trainer(auto_lr_find=True)
# lr_finder = trainer.tuner.lr_find(clf, datamodule=dm)
# fig = lr_finder.plot(suggest=True)

# trainer = pl.Trainer(
#     logger=logger,
#     callbacks=[checkpoint_cb, early_stopping_cb], 
#     gpus=-1,  
#     max_epochs=PARAMS['max_epochs'],
#     precision=PARAMS['precision'],
#     plugins=DDPPlugin(find_unused_parameters=False),
# )

## Train

In [11]:
trainer.fit(clf, datamodule=dm)

NameError: name 'trainer' is not defined

In [12]:
chk = 'snapshots/resnet-eitz/resnet-eitz-epoch=08-val_loss=1.04.ckpt'
model = Classifier.load_from_checkpoint(chk)

model.eval();
trainer = pl.Trainer(
    logger=logger,
    gpus=-1,
    plugins=DDPPlugin(find_unused_parameters=False)
)

trainer.test(model, datamodule=dm);

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Global seed set to 2174192935
initializing ddp: GLOBAL_RANK: 0, MEMBER: 1/1
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


https://app.neptune.ai/victor.faraggi/vision-dcc/e/VIS-31


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/acc': 0.6499999761581421,
 'test/loss': 1.2735302448272705,
 'test/loss_epoch': 1.3579310178756714}
--------------------------------------------------------------------------------


In [13]:
chk = 'snapshots/resnet-eitz/resnet-eitz-epoch=20-val/loss=0.98.ckpt'
model = Classifier.load_from_checkpoint(chk)

model.eval();
trainer = pl.Trainer(
    logger=logger,
    gpus=-1,
    plugins=DDPPlugin(find_unused_parameters=False)
)

trainer.test(model, datamodule=dm);

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Global seed set to 2174192935
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/acc': 0.5973333120346069,
 'test/loss': 1.5634576082229614,
 'test/loss_epoch': 1.6621181964874268}
--------------------------------------------------------------------------------


In [14]:
chk = 'snapshots/resnet-eitz/resnet-eitz-epoch=26-val_loss=0.68.ckpt'
model = Classifier.load_from_checkpoint(chk)

model.eval();
trainer = pl.Trainer(
    logger=logger,
    gpus=-1,
    plugins=DDPPlugin(find_unused_parameters=False)
)
trainer.test(model, datamodule=dm);

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
Global seed set to 2174192935
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test/acc': 0.7693333625793457,
 'test/loss': 1.09409499168396,
 'test/loss_epoch': 1.0121474266052246}
--------------------------------------------------------------------------------


In [15]:
torch.save(model.net, 'snapshots/resnet-eitz/best.ckpt')

In [16]:
torch.load('snapshots/resnet-eitz/best.ckpt')

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  