Create Lightning Data Module (subclass)

In [13]:
import os
import torch
from torch import nn, optim
import pytorch_lightning as pl
import torch.nn.functional as F
from dask import dataframe as dd
import torchvision.models as models
from pytorch_lightning import Trainer
from torch.utils.data import random_split
from torch.utils.data.dataloader import DataLoader
from pytorch_lightning.callbacks import ModelCheckpoint


NUM_WORKERS = os.cpu_count() // 1
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
pl.seed_everything(42)


class GroDataset(pl.LightningDataModule):
    def __init__(self, data_dir: str = None, batch_size: int = int(2**12), num_workers=NUM_WORKERS):
        super().__init__()
        self.data_dir = data_dir or os.getcwd()
        self.num_workers = num_workers
        self.batch_size = batch_size

    def prepare_data(self):
        self.train = torch.Tensor(dd.read_csv('train.csv'))
        self.test = torch.Tensor(dd.read_csv('X_test.csv'))

    def setup(self, train_ratio: float = 0.8, stage=None):
        if stage == 'fit' or stage is None:
            train_amount = int(len(self.train) * train_ratio)
            self.train, self.val = random_split(
                self.train, [train_amount, len(self.train) - train_amount])

    
    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True, shuffle=True)

    def val_dataloader(self):
        return DataLoader(self.val, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, num_workers=self.num_workers, pin_memory=True)

Global seed set to 42


Create Lightning Module subclass (model)

In [None]:
class BasicBlock(nn.Module):
    def __init__(self, inplanes, planes, downsample=None):
        super().__init__()
        self.ln1 = nn.Linear(inplanes, planes)
        self.bn1 = nn.BatchNorm1d(planes)
        self.silu = nn.SiLU(inplace=True)
        self.ln2 = nn.Conv2d(planes, planes)
        self.bn2 = nn.BatchNorm1d(planes)
        self.downsample = downsample

    def forward(self, x):
        identity = x
        out = self.ln1(x)
        out = self.bn1(out)
        out = self.silu(out)

        out = self.ln2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.silu(out)

        return out


In [15]:
from sklearn.metrics import mean_absolute_percentage_error

class ResNetRegressor(pl.LightningModule):

    def __init__(self, block, layers):
        super().__init__()
        self.lr = 1e-3
        self.loss = mean_absolute_percentage_error()
        self.sl = nn.SiLU()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1])
        self.layer3 = self._make_layer(block, 256, layers[2])
        self.layer4 = self._make_layer(block, 512, layers[3])

    def forward(self, x):
        x = 

    def _make_layer(self, block,inplanes,planes, blocks):
        downsample = None
        if inplanes != planes:
            downsample = nn.Sequential(
                nn.Linear(inplanes, planes),
                nn.BatchNorm1d(planes),
            )
        layers = []
        layers.append(block(inplanes, planes, downsample))
        inplanes = planes
        for _ in range(1, blocks):
            layers.append(block(inplanes, planes))
        return nn.Sequential(*layers)
    def forward(self, X):
        return self.resnet_model(X)

    def configure_optimizers(self):
        return optim.AdamW(self.parameters(), lr=self.lr)

    def training_step(self, batch, batch_idx):
        x, y = batch
        preds = self.forward(x)
        if self.num_classes == 2:
            y = F.one_hot(y, num_classes=2).float()
        loss = self.loss(preds, y)
        acc = self.acc(preds, y)
        # Logging the loss
        self.log("train_loss", loss, on_epoch=True, on_step=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        preds = self.forward(x)
        if self.num_classes == 2:
            y = F.one_hot(y, num_classes=2).float()
        loss = self.loss(preds, y)
        acc = self.acc(preds, y)
        # Logging the loss
        self.log("val_loss", loss, on_epoch=True, on_step=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        preds = self(x)
        if self.num_classes == 2:
            y = F.one_hot(y, num_classes=2).float()
        loss = self.loss(preds, y)
        acc = self.acc(preds, y)
        # perform logging
        self.log("test_loss", loss, on_epoch=True, on_step=True, logger=True)
