In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
Load_from_checkpoint = True
chkpt_path = 'drive/MyDrive/' +\
             'Checkpoints_sorted/65_perc_dm_100x50_main/epoch=2-step=164.ckpt'

Train = True
epochs = 4
chkpts_upload_dir = 'drive/MyDrive/checkpoints/100x50'

In [None]:
!cp -u /content/drive/MyDrive/100x50_main.zip ./
!unzip -q -n 100x50_main.zip -d temp_unzip_dir/
!rm -R -f setupdir
!mv temp_unzip_dir/* ./setupdir
!rm -d temp_unzip_dir

In [None]:
!pip -q install torch
!pip -q install torchvision
!pip -q install pytorch-lightning

from torchvision import datasets, transforms, models

from pytorch_lightning import LightningDataModule, LightningModule, Trainer
from pytorch_lightning.metrics.functional import accuracy
from pytorch_lightning.callbacks import ModelCheckpoint

import torch
from torch.nn import functional
from torch.utils.data import DataLoader, random_split

Collecting pytorch-lightning
  Downloading pytorch_lightning-1.4.2-py3-none-any.whl (916 kB)
[K     |████████████████████████████████| 916 kB 7.8 MB/s 
Collecting pyDeprecate==0.3.1
  Downloading pyDeprecate-0.3.1-py3-none-any.whl (10 kB)
Collecting fsspec[http]!=2021.06.0,>=2021.05.0
  Downloading fsspec-2021.7.0-py3-none-any.whl (118 kB)
[K     |████████████████████████████████| 118 kB 74.8 MB/s 
Collecting future>=0.17.1
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |████████████████████████████████| 829 kB 58.3 MB/s 
Collecting torchmetrics>=0.4.0
  Downloading torchmetrics-0.5.0-py3-none-any.whl (272 kB)
[K     |████████████████████████████████| 272 kB 67.3 MB/s 
Collecting PyYAML>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 50.1 MB/s 
Collecting aiohttp
  Downloading aiohttp-3.7.4.post0-cp37-cp37m-manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 37.1 MB/s 
Co

In [None]:
class ClassifyByCatDM(LightningDataModule):
    def __init__(self, setupdir, train_frac=0.9, seed=0, batch_size=64):
        
        super().__init__()

        self.batch_size = batch_size
        self.setupdir = setupdir
        self.train_frac = train_frac
        self.seed = seed
        self.batch_size = batch_size
        
        self.transform = transforms.Compose([
              transforms.Resize(size=256),
              transforms.CenterCrop(size=224),
              transforms.ToTensor(),
              transforms.Normalize([0.485, 0.456, 0.406],
                                   [0.229, 0.224, 0.225])
        ])

    def setup(self):
        
        torch.manual_seed(self.seed)
        
        dataset = datasets.ImageFolder(self.setupdir)
        self.num_classes = len(dataset.classes)
        
        set_len = len(dataset)
        train_len = int(set_len * self.train_frac)
        val_len = int(set_len * (1 - self.train_frac) / 2)
        test_len = set_len - train_len - val_len
        
        self.train, self.val, self.test = random_split(dataset, 
                                                      [train_len,
                                                       val_len,
                                                       test_len])
        self.train.dataset.transform = self.transform
        
        self.val.dataset.transform = self.transform
        
        self.test.dataset.transform = self.transform
        
    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size)

In [None]:
class ClassifyModel(LightningModule):
    def __init__(self, input_shape, num_classes,
                 learning_rate = 1e-4, batch_size=64):
        
        super().__init__()

        self.batch_size = batch_size
        
        self.save_hyperparameters()
        self.learning_rate = learning_rate
        self.dim = input_shape
        self.num_classes = num_classes
        
        self.feature_extractor = models.resnet34(pretrained=True)
        self.feature_extractor.eval()
        
        n_sizes = self._get_conv_output(input_shape)
        self.classifier = torch.nn.Linear(n_sizes, num_classes)
        
        self.predictions = []

    def _get_conv_output(self, shape):
        
        batch_size = 1
        inp = torch.autograd.Variable(torch.rand(batch_size, *shape))
        
        features = self._forward_features(inp)
        n_size = features.data.view(batch_size, -1).size(1)
        return n_size
    
    def _forward_features(self, x):
        
        x = self.feature_extractor(x)
        return x
    
    def forward(self, x):
        
        x = self._forward_features(x)
        x = x.view(x.size(0), -1)
        x = functional.log_softmax(self.classifier(x), dim=1)
        
        return x
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = functional.nll_loss(logits, y)

        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, logger=True)
        self.log('train_acc', acc, on_step=True, on_epoch=True, logger=True)        

        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = functional.nll_loss(logits, y)

        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = functional.nll_loss(logits, y)

        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)
        
        for i in range(len(y)):
            self.predictions.append(preds[i])

        self.log('test_loss', loss, prog_bar=True)
        self.log('test_acc', acc, prog_bar=True)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

In [None]:
# setup the datamodule
batch_size = 64
dm = ClassifyByCatDM(setupdir='setupdir', train_frac=0.7,
                  seed=0, batch_size=batch_size)
dm.setup()

In [None]:
# load model from a checkpoint
model = None
if Load_from_checkpoint:
    trainer = Trainer(gpus=[0])
    model = ClassifyModel.load_from_checkpoint(chkpt_path)
    trainer.test(model, dm)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth


  0%|          | 0.00/83.3M [00:00<?, ?B/s]

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

  stream(template_mgs % msg_args)


--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.656000018119812, 'test_loss': 1.4140751361846924}
--------------------------------------------------------------------------------


In [None]:
# train model
if Train:
    if not model:
        model = ClassifyModel((3,224,224), dm.num_classes,
                              batch_size=batch_size, learning_rate=2e-4)

    checkpoint = ModelCheckpoint(dirpath=chkpts_upload_dir,
                                monitor='val_loss', save_top_k=1)

    trainer = Trainer(max_epochs=epochs,
                    progress_bar_refresh_rate=1,
                    gpus=[0],
                    callbacks = [checkpoint])

    trainer.fit(model, dm)

    trainer.test()

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  f"DataModule.{name} has already been called, so it will not be called again. "
  f"DataModule.{name} has already been called, so it will not be called again. "
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name              | Type   | Params
---------------------------------------------
0 | feature_extractor | ResNet | 21.8 M
1 | classifier        | Linear | 100 K 
---------------------------------------------
21.9 M    Trainable params
0         Non-trainable params
21.9 M    Total params
87.591    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: -1it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'test_acc': 0.6386666893959045, 'test_loss': 1.7253798246383667}
--------------------------------------------------------------------------------


  f"DataModule.{name} has already been called, so it will not be called again. "
