In [1]:
!pip install --quiet torch>=1.10.0 torchvision>=0.11.0 pytorch-lightning==1.7.1 torchmetrics==0.9.3 timm optuna==2.10.1


[0m

In [2]:
from typing import Any, Dict, Optional, Tuple, List

import argparse
import os
from typing import List
from typing import Optional

import optuna
from optuna.integration import PyTorchLightningPruningCallback
from optuna.trial import TrialState

import os
import subprocess
import torch
import timm
import json

import pytorch_lightning as pl
import torchvision.transforms as T
import torch.nn.functional as F

from pathlib import Path
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Dataset
from torchmetrics.functional import accuracy
from torchmetrics import F1Score, Precision, Recall, ConfusionMatrix, MaxMetric, MeanMetric
from torchmetrics.classification.accuracy import Accuracy
from pytorch_lightning import loggers as pl_loggers
from pytorch_lightning.callbacks import TQDMProgressBar
from torch.optim.lr_scheduler import OneCycleLR
from packaging import version
import torch.optim as optim
from datetime import datetime

In [3]:
if version.parse(pl.__version__) < version.parse("1.0.2"):
    raise RuntimeError("PyTorch Lightning>=1.0.2 is required for this example.")

PERCENT_VALID_EXAMPLES = 0.1
BATCHSIZE = 64
CLASSES = 6
EPOCHS = 5
DIR = "/kaggle/input/intel-image-classification"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
class LitResnet(pl.LightningModule):

    def __init__(
            self,
            model_name='resnet18',
            optim_name="SGD",
            num_classes=6,
            lr=0.05,
    ):
        super().__init__()

        # this line allows to access init params with 'self.hparams' attribute
        # also ensures init params will be stored in ckpt
        self.save_hyperparameters(logger=False, ignore=["net"])

        self.num_classes = num_classes

        self.net = timm.create_model(model_name, pretrained=True, num_classes=self.num_classes)
        self.lr = lr
        # configure optimizer
        if optim_name == "ADAM":
            self.optim_name = torch.optim.Adam
        if optim_name == "SGD":
            self.optim_name = torch.optim.SGD
        if optim_name == "RMS":
            self.optim_name = torch.optim.RMSprop

        # for averaging loss across batches
        self.train_loss = MeanMetric()
        self.val_loss = MeanMetric()
        self.test_loss = MeanMetric()

        # loss function
        self.criterion = torch.nn.CrossEntropyLoss()

        # for tracking best so far validation accuracy
        self.val_acc_best = MaxMetric()

        # metric objects for calculating and averaging accuracy across batches
        self.train_acc = Accuracy(task='multiclass', num_classes=self.num_classes)
        self.val_acc = Accuracy(task='multiclass', num_classes=self.num_classes)
        self.test_acc = Accuracy(task='multiclass', num_classes=self.num_classes)

        # some other metrics to be logged
        self.f1_score = F1Score(task="multiclass", num_classes=self.num_classes)
        self.precision_score = Precision(task="multiclass", average='macro', num_classes=self.num_classes)
        self.recall_score = Recall(task="multiclass", average='macro', num_classes=self.num_classes)

    def forward(self, x: torch.Tensor):
        return self.net(x)

    def step(self, batch: Any):
        x, y = batch
        logits = self.forward(x)
        loss = self.criterion(logits, y)
        preds = torch.argmax(logits, dim=1)
        return loss, preds, y

    def training_step(self, batch: Any, batch_idx: int):
        loss, preds, targets = self.step(batch)

        # update and log metrics
        self.train_loss(loss)
        self.train_acc(preds, targets)
        self.log("train/loss", self.train_loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log("train/acc", self.train_acc, on_step=True, on_epoch=True, prog_bar=True)

        # we can return here dict with any tensors
        # and then read it in some callback or in `training_epoch_end()` below
        # remember to always return loss from `training_step()` or backpropagation will fail!
        return {"loss": loss, "preds": preds, "targets": targets}

    def training_epoch_end(self, outputs: List[Any]):
        # `outputs` is a list of dicts returned from `training_step()`
        pass

    def validation_step(self, batch: Any, batch_idx: int):
        loss, preds, targets = self.step(batch)

        # update and log metrics
        self.val_loss(loss)
        self.val_acc(preds, targets)
        self.f1_score(preds, targets)
        self.precision_score(preds, targets)
        self.recall_score(preds, targets)
        self.log("val/loss", self.val_loss, on_step=True, on_epoch=True, prog_bar=True)
        self.log("val/acc", self.val_acc, on_step=True, on_epoch=True, prog_bar=False)
        self.log("val/f1", self.val_acc, on_step=False, on_epoch=True, prog_bar=False)
        self.log("val/precision", self.precision_score, on_step=False, on_epoch=True, prog_bar=False)
        self.log("val/recall", self.recall_score, on_step=False, on_epoch=True, prog_bar=False)
        return {"loss": loss, "preds": preds, "targets": targets}

    def validation_epoch_end(self, outs: List[Any]):
        acc = self.val_acc.compute()  # get current val acc
        self.val_acc_best(acc)  # update best so far val acc
        # log `val_acc_best` as a value through `.compute()` method, instead of as a metric object
        # otherwise metric would be reset by lightning after each epoch
        self.log("val/acc_best", self.val_acc_best.compute(), on_step=False, on_epoch=True, prog_bar=False)

    def test_step(self, batch: Any, batch_idx: int):
        loss, preds, targets = self.step(batch)

        # update and log metrics
        self.test_loss(loss)
        self.test_acc(preds, targets)
        self.log("test/loss", self.test_loss, on_step=False, on_epoch=True, prog_bar=True)
        self.log("test/acc", self.test_acc, on_step=False, on_epoch=True, prog_bar=True)

        return {"loss": loss, "preds": preds, "targets": targets}

    def test_epoch_end(self, outputs: List[Any]):
        pass

    def configure_optimizers(self):
        optimizer = self.optim_name(
            self.parameters(),
            lr=self.lr,
        )
#         steps_per_epoch = 45000 // BATCHSIZE
        sch = torch.optim.lr_scheduler.StepLR(optimizer, step_size  = 10 , gamma = 0.5)
        return {
            "optimizer":optimizer,
            "lr_scheduler" : {
                "scheduler" : sch,
                "monitor" : "train/loss",
                
            }
          }
#         scheduler_dict = {
#             "scheduler": OneCycleLR(
#                 optimizer,
#                 0.1,
#                 epochs=self.trainer.max_epochs,
#                 steps_per_epoch=steps_per_epoch,
#             ),
#             "interval": "step",
#         }
#         return {"optimizer": optimizer, "lr_scheduler": scheduler_dict}
          


In [5]:
class IntelClassificationDataModule(pl.LightningDataModule):
    def __init__(
            self,
            data_dir: str = "dataset/",
            batch_size: int = 256,
            num_workers: int = 4,
            pin_memory: bool = False,
    ):
        super().__init__()

        # this line allows to access init params with 'self.hparams' attribute
        # also ensures init params will be stored in ckpt
        self.save_hyperparameters(logger=False)

        self.data_dir = Path(data_dir)

        # data transformations
        self.transforms = T.Compose([
            T.RandomRotation(degrees=66),
            T.RandomHorizontalFlip(p=0.5),
            T.ColorJitter(brightness=(0.1,0.6), contrast=1,saturation=0, hue=0.4),
            T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
            T.Resize((224, 224)),
            T.RandomCrop(size=(128, 128)),
            T.ToTensor(),
            T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        ])

        self.data_train: Optional[Dataset] = None
        self.data_test: Optional[Dataset] = None

    @property
    def num_classes(self):
        return len(self.data_train.classes)

    @property
    def classes(self):
        return self.data_train.classes

    def prepare_data(self):
        """Download data if needed.
        Do not use it to assign state (self.x = y).
        """
        pass

    def setup(self, stage: Optional[str] = None):
        """Load data. Set variables: `self.data_train`, `self.data_val`, `self.data_test`.
        This method is called by lightning with both `trainer.fit()` and `trainer.test()`, so be
        careful not to execute things like random split twice!
        """
        # load and split datasets only if not loaded already
        if not self.data_train and not self.data_test:
            trainset = ImageFolder(self.data_dir / "seg_train" / "seg_train", transform=self.transforms)
            testset = ImageFolder(self.data_dir / "seg_test" / "seg_test", transform=self.transforms)

            self.data_train, self.data_test = trainset, testset

    def train_dataloader(self):
        return DataLoader(
            dataset=self.data_train,
            batch_size=self.hparams.batch_size,
            num_workers=self.hparams.num_workers,
            pin_memory=self.hparams.pin_memory,
            shuffle=True,
        )

    def val_dataloader(self):
        return DataLoader(
            dataset=self.data_test,
            batch_size=self.hparams.batch_size,
            num_workers=self.hparams.num_workers,
            pin_memory=self.hparams.pin_memory,
            shuffle=False,
        )

    def test_dataloader(self):
        return DataLoader(
            dataset=self.data_test,
            batch_size=self.hparams.batch_size,
            num_workers=self.hparams.num_workers,
            pin_memory=self.hparams.pin_memory,
            shuffle=False,
        )

    def teardown(self, stage: Optional[str] = None):
        """Clean up after fit or test."""
        pass

    def state_dict(self):
        """Extra things to save to checkpoint."""
        return {}

    def load_state_dict(self, state_dict: Dict[str, Any]):
        """Things to do when loading checkpoint."""
        pass


In [6]:
# datamodule = IntelClassificationDataModule(data_dir=DIR, batch_size=BATCHSIZE)
# model = LitResnet(model_name="resnet18", optim_name="ADAM")

# # finds learning rate automatically
# # sets hparams.lr or hparams.learning_rate to that learning rate
# trainer = pl.Trainer(auto_lr_find=True)

# trainer.tune(model)

In [7]:
avail_pretrained_models = timm.list_models("efficientnet*",pretrained=True)
avail_pretrained_models

['efficientnet_b0',
 'efficientnet_b1',
 'efficientnet_b1_pruned',
 'efficientnet_b2',
 'efficientnet_b2_pruned',
 'efficientnet_b3',
 'efficientnet_b3_pruned',
 'efficientnet_b4',
 'efficientnet_el',
 'efficientnet_el_pruned',
 'efficientnet_em',
 'efficientnet_es',
 'efficientnet_es_pruned',
 'efficientnet_lite0',
 'efficientnetv2_rw_m',
 'efficientnetv2_rw_s',
 'efficientnetv2_rw_t']

In [8]:
avail_pretrained_models = timm.list_models("regnet*",pretrained=True)
avail_pretrained_models

['regnetv_040',
 'regnetv_064',
 'regnetx_002',
 'regnetx_004',
 'regnetx_006',
 'regnetx_008',
 'regnetx_016',
 'regnetx_032',
 'regnetx_040',
 'regnetx_064',
 'regnetx_080',
 'regnetx_120',
 'regnetx_160',
 'regnetx_320',
 'regnety_002',
 'regnety_004',
 'regnety_006',
 'regnety_008',
 'regnety_016',
 'regnety_032',
 'regnety_040',
 'regnety_064',
 'regnety_080',
 'regnety_120',
 'regnety_160',
 'regnety_320',
 'regnetz_040',
 'regnetz_040h',
 'regnetz_b16',
 'regnetz_c16',
 'regnetz_c16_evos',
 'regnetz_d8',
 'regnetz_d8_evos',
 'regnetz_d32',
 'regnetz_e8']

In [9]:
import torch
print(torch.cuda.device_count())

1


In [10]:
from optuna.integration import PyTorchLightningPruningCallback


In [11]:

BATCHSIZE = 128
model_names = ["resnet18", "efficientnet_b0", "regnetz_c16"]
for model_name in model_names:
    def objective(trial: optuna.trial.Trial) -> float:
        lr = float(trial.suggest_loguniform("lr", 1e-5, 1e-3))
        # Generate the optimizers
        optimizer_name = trial.suggest_categorical("optimizer", ["ADAM", "SGD", "RMS"])
        print("Trail with : \n")
        print("lr_rate:"+str(lr)+" model name: "+model_name+" optimizer name: "+optimizer_name)
        print("=========================================")
        model = LitResnet(model_name=model_name, optim_name=optimizer_name, lr=lr)

        datamodule = IntelClassificationDataModule(data_dir=DIR, batch_size=BATCHSIZE)

        tb_logger = pl_loggers.TensorBoardLogger(save_dir="logs")
        trainer = pl.Trainer(
            logger=tb_logger,
            limit_val_batches=PERCENT_VALID_EXAMPLES,
            enable_checkpointing=False,
            accelerator="auto",
            max_epochs=EPOCHS,
            callbacks=[PyTorchLightningPruningCallback(trial, monitor="train/acc")],
        )
        hyperparameters = dict(model_name=model_name, optimizer_name=optimizer_name, learning_rate=lr)
        trainer.logger.log_hyperparams(hyperparameters)
        trainer.fit(model, datamodule=datamodule)

        return trainer.callback_metrics["val/acc"].item()



    pruner: optuna.pruners.BasePruner = (optuna.pruners.MedianPruner())

    study = optuna.create_study(direction="maximize", pruner=pruner)
    study.optimize(objective, n_trials=300, timeout=500, n_jobs=2)

    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

    print("Study statistics: ")
    print("  Number of finished trials: ", len(study.trials))
    print("  Number of pruned trials: ", len(pruned_trials))
    print("  Number of complete trials: ", len(complete_trials))
    print("Number of finished trials: {}".format(len(study.trials)))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))
    
    print("Trail with : \n")
    print("=========================================")
    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))


[32m[I 2023-01-19 04:14:06,797][0m A new study created in memory with name: no-name-1450d39f-980b-4a09-bb72-c02592ee06f5[0m


Trail with : 

lr_rate:0.00011006008295331135 model name: resnet18 optimizer name: RMS
Trail with : 

lr_rate:0.0003880858334105841 model name: resnet18 optimizer name: ADAM


Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth
Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to /root/.cache/torch/hub/checkpoints/resnet18-5c106cde.pth


Sanity Checking: 0it [00:00, ?it/s]

Sanity Checking: 0it [00:00, ?it/s]

  cpuset_checked))


Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

[32m[I 2023-01-19 04:31:07,143][0m Trial 1 finished with value: 0.921875 and parameters: {'lr': 0.0003880858334105841, 'optimizer': 'ADAM'}. Best is trial 1 with value: 0.921875.[0m
[32m[I 2023-01-19 04:31:07,149][0m Trial 0 finished with value: 0.80078125 and parameters: {'lr': 0.00011006008295331135, 'optimizer': 'RMS'}. Best is trial 1 with value: 0.921875.[0m
[32m[I 2023-01-19 04:31:07,152][0m A new study created in memory with name: no-name-cdcc2bb5-2a41-4d8c-af20-6e0a487fc21c[0m


Study statistics: 
  Number of finished trials:  2
  Number of pruned trials:  0
  Number of complete trials:  2
Number of finished trials: 2
Best trial:
  Value: 0.921875
Trail with : 

  Params: 
    lr: 0.0003880858334105841
    optimizer: ADAM
Trail with : 

lr_rate:1.547691630918372e-05 model name: efficientnet_b0 optimizer name: SGD
Trail with : 

lr_rate:0.00027665284312550367 model name: efficientnet_b0 optimizer name: RMS


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b0_ra-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_ra-3dd342df.pth
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/efficientnet_b0_ra-3dd342df.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_ra-3dd342df.pth


Sanity Checking: 0it [00:00, ?it/s]

Sanity Checking: 0it [00:00, ?it/s]

  cpuset_checked))


Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

[32m[I 2023-01-19 04:48:38,367][0m Trial 1 finished with value: 0.734375 and parameters: {'lr': 0.00027665284312550367, 'optimizer': 'RMS'}. Best is trial 1 with value: 0.734375.[0m
[32m[I 2023-01-19 04:48:38,514][0m Trial 0 finished with value: 0.11328125 and parameters: {'lr': 1.547691630918372e-05, 'optimizer': 'SGD'}. Best is trial 1 with value: 0.734375.[0m
[32m[I 2023-01-19 04:48:38,519][0m A new study created in memory with name: no-name-7f11cb25-240f-47ad-8b47-6fbb1358531b[0m


Study statistics: 
  Number of finished trials:  2
  Number of pruned trials:  0
  Number of complete trials:  2
Number of finished trials: 2
Best trial:
  Value: 0.734375
Trail with : 

  Params: 
    lr: 0.00027665284312550367
    optimizer: RMS
Trail with : 

lr_rate:2.1172144867677624e-05 model name: regnetz_c16 optimizer name: SGD
Trail with : 

lr_rate:0.00012310173574776534 model name: regnetz_c16 optimizer name: ADAM


Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_c_rab2_256-a54bf36a.pth" to /root/.cache/torch/hub/checkpoints/regnetz_c_rab2_256-a54bf36a.pth
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-attn-weights/regnetz_c_rab2_256-a54bf36a.pth" to /root/.cache/torch/hub/checkpoints/regnetz_c_rab2_256-a54bf36a.pth


Sanity Checking: 0it [00:00, ?it/s]

  cpuset_checked))


Sanity Checking: 0it [00:00, ?it/s]



Training: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

[32m[I 2023-01-19 05:07:27,798][0m Trial 0 finished with value: 0.17578125 and parameters: {'lr': 2.1172144867677624e-05, 'optimizer': 'SGD'}. Best is trial 0 with value: 0.17578125.[0m
[32m[I 2023-01-19 05:07:28,401][0m Trial 1 finished with value: 0.83203125 and parameters: {'lr': 0.00012310173574776534, 'optimizer': 'ADAM'}. Best is trial 1 with value: 0.83203125.[0m


Study statistics: 
  Number of finished trials:  2
  Number of pruned trials:  0
  Number of complete trials:  2
Number of finished trials: 2
Best trial:
  Value: 0.83203125
Trail with : 

  Params: 
    lr: 0.00012310173574776534
    optimizer: ADAM
