In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
import os

import torch
import torch.nn.functional as F
#!pip install pytorch_lightning
from pytorch_lightning import LightningDataModule, LightningModule, Trainer
from pytorch_lightning.callbacks.progress import TQDMProgressBar
from torch import nn
from torch.utils.data import DataLoader, random_split
from torchmetrics.functional import accuracy
from torchvision import transforms

# Note - you must have torchvision installed for this example
from torchvision.datasets import CIFAR10, MNIST

PATH_DATASETS = os.environ.get("PATH_DATASETS", "/content/drive/MyDrive/Datasets")
BATCH_SIZE = 256 if torch.cuda.is_available() else 64

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pytorch_lightning
  Downloading pytorch_lightning-1.6.5-py3-none-any.whl (585 kB)
[K     |████████████████████████████████| 585 kB 36.3 MB/s 
Collecting fsspec[http]!=2021.06.0,>=2021.05.0
  Downloading fsspec-2022.5.0-py3-none-any.whl (140 kB)
[K     |████████████████████████████████| 140 kB 64.2 MB/s 
Collecting torchmetrics>=0.4.1
  Downloading torchmetrics-0.9.3-py3-none-any.whl (419 kB)
[K     |████████████████████████████████| 419 kB 68.9 MB/s 
[?25hCollecting pyDeprecate>=0.3.1
  Downloading pyDeprecate-0.3.2-py3-none-any.whl (10 kB)
Collecting PyYAML>=5.4
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 35.2 MB/s 
Collecting aiohttp
  Downloading aiohttp-3.8.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manyl

In [3]:
class LitMNIST(LightningModule):
  def __init__(self, data_dir=PATH_DATASETS, hidden_size=64, lr=2e-4):
    super().__init__()

    # we hardcode dataset specific stuff here
    self.data_dir = data_dir
    self.num_classes = 10
    self.dims = (1,28,28)
    channels, width, height = self.dims
    self.transform = transforms.Compose([
      transforms.ToTensor(),
      transforms.Normalize((0.1307,), (0.3081,))
    ])
    self.hidden_size = hidden_size
    self.lr = lr

    # Build model
    self.model = nn.Sequential(
        nn.Flatten(),
        nn.Linear(channels * width * height, hidden_size),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.Linear(hidden_size, hidden_size),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.Linear(hidden_size, self.num_classes),
    )

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x, dim=1)

  def training_step(self, batch, batch_idx):
    x, y = batch
    logits = self(x)
    loss = F.nll_loss(logits, y)
    return loss

  def validation_step(self, batch, batch_idx):
    x, y = batch
    logits = self(x)
    loss = F.nll_loss(logits, y)
    preds = torch.argmax(logits, dim=1)
    acc = accuracy(preds, y)
    self.log("val_loss", loss, prog_bar = True)
    self.log("val_acc", acc, prog_bar = True)

  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
    return optimizer

  ########################
  ###DATA RELATED HOOKS###
  ########################

  def prepare_data(self):
    # download
    MNIST(root=self.data_dir, train=True, download=False)
    MNIST(root=self.data_dir, train=False, download=False)

  def setup(self, stage=None):
    # Assign train/val datasets for use in dataloaders
    if stage == "fit" or stage is None:
      mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
      self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

    # Assign test dataset for use in dataloaders
    if stage == "test" or stage is None:
      self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)

  def train_dataloader(self):
    return DataLoader(self.mnist_train, batch_size=128)

  def val_dataloader(self):
    return DataLoader(self.mnist_val, batch_size=128)
  
  def test_dataloader(self):
    return DataLoader(self.mnist_test, batch_size=128)

In [4]:
model = LitMNIST()
model.data_dir

'/content/drive/MyDrive/Datasets'

In [5]:
from torch.cuda import is_available
trainer = Trainer(
    max_epochs=5,
    accelerator="auto",
    devices = 1 if torch.cuda.is_available() else None,
    callbacks = [TQDMProgressBar(refresh_rate=20)],
)
trainer.fit(model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /content/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 55.1 K
-------------------------------------
55.1 K    Trainable params
0         Non-trainable params
55.1 K    Total params
0.220     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [6]:
class MNISTDataModule(LightningDataModule):
  def __init__(self, data_dir: str = PATH_DATASETS):
    super().__init__()
    self.data_dir = data_dir
    self.transform = transforms.Compose(
        [
         transforms.ToTensor(),
         transforms.Normalize((0.1307,), (0.3081,))
        ]
    )
    self.dims = (1,28,28)
    self.num_classes = 10

  def prepare_data(self):
    # download
    MNIST(root=self.data_dir, train=True, download=False)
    MNIST(root=self.data_dir, train=False, download=False)

  def setup(self, stage=None):
    # Assign train / val datasets for use in DataLoaders
    if stage == "fit" or stage is None:
      mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
      self.mnist_train, self.mnist_val = random_split(mnist_full, [55000,5000])

    # Assign test dataset for use in DataLoader
    if stage == "test" or stage is None:
      self.mnist_test = MNIST(self.data_dir, train=False, transform=self.transform)

  def train_dataloader(self):
    return DataLoader(self.mnist_train, batch_size=BATCH_SIZE)

  def val_dataloader(self):
    return DataLoader(self.mnist_val, batch_size=BATCH_SIZE)

  def test_dataloader(self):
    return DataLoader(self.mnist_test, batch_size=BATCH_SIZE)


In [7]:
class LitModel(LightningModule):
  def __init__(self, channels, width, height, num_classes, hidden_size=64, lr=2e-4):
    super().__init__()

    # we take in input dimensions as parameters and use those to dynamically build model
    self.channels = channels
    self.width = width
    self.height = height
    self.num_classes = num_classes
    self.hidden_size = hidden_size
    self.lr = lr

    self.model = nn.Sequential(
        nn.Flatten(),
        nn.Linear(channels * width * height, hidden_size),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.Linear(hidden_size, hidden_size),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.Linear(hidden_size, num_classes),
    )

  def forward(self, x):
    x = self.model(x)
    return F.log_softmax(x, dim=1)

  def training_step(self, batch, batch_idx):
    x, y = batch
    logits = self(x)
    loss = F.nll_loss(logits, y)
    return loss

  def validation_step(self, batch, batch_idx):
    x, y = batch
    logits = self(x)
    loss = F.nll_loss(logits, y)
    preds = torch.argmax(logits, dim=1)
    acc = accuracy(preds, y)
    self.log("val_loss ", loss, prog_bar=True)
    self.log("val_acc ", acc, prog_bar=True)

  def configure_optimizers(self):
    optimizer = torch.optim.Adam(self.parameters(), lr=self.lr)
    return optimizer

In [8]:
# init datamodule
dm = MNISTDataModule()

# init model from datamodule's attributes
model = LitModel(*dm.size(), dm.num_classes)

# init trainer
trainer = Trainer(
    max_epochs = 5,
    callbacks = [TQDMProgressBar(refresh_rate = 20)],
    accelerator = "auto",
    devices = 1 if torch.cuda.is_available() else None,
)

# pass the datamodule as arg to trainer.fit to override model hooks
trainer.fit(model, dm)

  rank_zero_deprecation("DataModule property `dims` was deprecated in v1.5 and will be removed in v1.7.")
  rank_zero_deprecation("DataModule property `size` was deprecated in v1.5 and will be removed in v1.7.")
  rank_zero_deprecation("DataModule property `dims` was deprecated in v1.5 and will be removed in v1.7.")
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 55.1 K
-------------------------------------
55.1 K    Trainable params
0         Non-trainable params
55.1 K    Total params
0.220     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [9]:
class CIFAR10DataModule(LightningDataModule):
  def __init__(self, data_dir: str = PATH_DATASETS):
    super().__init__()
    self.data_dir = data_dir
    self.transform = transforms.Compose(
        [
         transforms.ToTensor(),
         transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ]
    )

    self.dims = (3, 32, 32)
    self.num_classes = 10

  def prepare_data(self):
    CIFAR10(self.data_dir, train=True, download=True)
    CIFAR10(self.data_dir, train=False, download=True)

  def setup(self, stage=None):
    if stage == "fit" or stage is None:
      cifar_full = CIFAR10(self.data_dir, train=True, transform=self.transform)
      self.cifar_train, self.cifar_val = random_split(cifar_full, [45000, 5000])

    if stage == "test" or stage is None:
      self.cifar_test = CIFAR10(self.data_dir, train=False, transform=self.transform)

  def train_dataloader(self):
    return DataLoader(self.cifar_train, batch_size=BATCH_SIZE)

  def val_dataloader(self):
    return DataLoader(self.cifar_val, batch_size=BATCH_SIZE)

  def test_dataloader(self):
    return DataLoader(self.cifar_test, batch_size=BATCH_SIZE)

In [10]:
dm = CIFAR10DataModule()
model = LitModel(*dm.size(), dm.num_classes, hidden_size=256)
tqdm_progress_bar = TQDMProgressBar(refresh_rate=20)
trainer = Trainer(
    max_epochs = 5,
    accelerator = "auto",
    devices = 1 if torch.cuda.is_available() else None,
    callbacks = [tqdm_progress_bar],
)
trainer.fit(model, dm)

  rank_zero_deprecation("DataModule property `dims` was deprecated in v1.5 and will be removed in v1.7.")
  rank_zero_deprecation("DataModule property `size` was deprecated in v1.5 and will be removed in v1.7.")
  rank_zero_deprecation("DataModule property `dims` was deprecated in v1.5 and will be removed in v1.7.")
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Files already downloaded and verified
Files already downloaded and verified


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 855 K 
-------------------------------------
855 K     Trainable params
0         Non-trainable params
855 K     Total params
3.420     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

In [11]:
trainer.test()

  f"`.{fn}(ckpt_path=None)` was called without a model."


MisconfigurationException: ignored