In [1]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import  random_split, DataLoader


import pytorch_lightning as pl
import torchmetrics

In [2]:
# basic model
model = nn.Sequential(
    nn.Linear(28*28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 10)
)

In [46]:
class ResNet(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(28*28, 64)
        self.l2 = nn.Linear(64, 64)
        self.l3 = nn.Linear(64, 10)
        self.do = nn.Dropout(0.1)
        
        self.loss_f = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.classification.Accuracy(task="multiclass", num_classes=10)
    
    def forward(self, x):
        h1 = nn.functional.relu(self.l1(x))
        h2 = nn.functional.relu(self.l2(h1))
        do = self.do(h1+h2)
        logits = self.l3(do)
        return logits

    def configure_optimizers(self):
        optimiser = optim.SGD(self.parameters(), lr=1.5e-3)
        return optimiser

    def training_step(self, batch, batch_idx):

        x, y = batch
        # x -> b * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1)

        # step 1: forward
        logits = self(x)  # y_hat : logits
        # step 2: compute the objective/loss function
        loss = self.loss_f(logits, y)
        acc = self.accuracy(logits, y)
        
        # self.log("train_loss", loss, prog_bar=True)
        # self.log("train_acc", acc, prog_bar=True)
        # pbar = {'train_acc': acc}
        # return {'loss': loss}
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        b = x.size(0)
        x = x.view(b, -1)
        logits = self(x) 
        loss = self.loss_f(logits, y)
        acc = self.accuracy(logits, y)
        self.log("val_loss", loss, prog_bar=True)
        self.log("val_acc", acc, prog_bar=True)
        return loss

    def prepare_data(self):
        datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())

    def setup(self, stage=None):
        dataset = datasets.MNIST('data', train=True, download=False, transform=transforms.ToTensor())
        self.train_data, self.val_data = random_split(dataset, [55000, 5000])
        
    def train_dataloader(self):
        train_loader = DataLoader(self.train_data, batch_size=32, num_workers=2)
        return train_loader

    def val_dataloader(self):
        val_loader = DataLoader(self.val_data, batch_size = 32, num_workers=2)
        return val_loader
    

# initialise model
model = ResNet()

In [47]:
trainer = pl.Trainer(max_epochs=3)
trainer.fit(model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



  | Name     | Type               | Params
------------------------------------------------
0 | l1       | Linear             | 50.2 K
1 | l2       | Linear             | 4.2 K 
2 | l3       | Linear             | 650   
3 | do       | Dropout            | 0     
4 | loss_f   | CrossEntropyLoss   | 0     
5 | accuracy | MulticlassAccuracy | 0     
------------------------------------------------
55.1 K    Trainable params
0         Non-trainable params
55.1 K    Total params
0.220     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3` reached.


In [8]:
# Define the optimizers
params = model.parameters()
optimiser = optim.SGD(model.parameters(), lr=1.5e-3)

In [9]:
# Define the loss function
loss = nn.CrossEntropyLoss()

In [10]:
# Train, Val split
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size = 32)

In [2]:
torch.randn(5).cuda()

tensor([-0.2269,  0.3183,  0.3237, -1.1954, -0.2639], device='cuda:0')

In [13]:
# Training loop
nb_epochs = 5

for epoch in range(nb_epochs):
    losses = list()
    accuracies = list()

    model.train()
    for batch in train_loader:
        x, y = batch

        # x -> b * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1).cuda()

        # step 1: forward
        y_hat = model(x)  # y_hat : logits

        # step 2: compute the objective/loss function
        j = loss(y_hat, y.cuda())

        # step 3: cleaning the gradients
        model.zero_grad() 
        # optimiser.zero_grad()
        # params.grad._zero()

        # step 4: compute the partial derivatives of j with parameters
        j.backward()


        # step 5: backprop/ oposite direction step
        optimiser.step()
        # with torch.no_grad(): params = params - eta * params.grad

        losses.append(j.item())
        accuracies.append(y.eq(y_hat.cpu().detach().argmax(dim=1)).float().mean())
    print(f'Epoch {epoch+1}', end=', ')
    print(f'training loss : {torch.tensor(losses).mean():.2f}', end=', ')
    print(f'training accuracy : {torch.tensor(accuracies).mean():.2f}')

    losses_val = list()
    accuracies = list()
    model.eval()
    for batch in val_loader:
        x, y = batch

        # x -> b * 1 * 28 * 28
        b = x.size(0)
        x = x.view(b, -1).cuda()

        # step 1: forward but with no grad
        with torch.no_grad():
            y_hat = model(x)  # y_hat : logits

        # step 2: compute the objective/loss function
        j = loss(y_hat, y.cuda())

        losses_val.append(j.item())
        accuracies.append(y.eq(y_hat.cpu().detach().argmax(dim=1)).float().mean())


    
    print(f'Epoch {epoch+1}', end=', ')
    print(f'validation loss : {torch.tensor(losses_val).mean():.2f}', end=', ')
    print(f'validation accuracy : {torch.tensor(accuracies).mean():.2f}')

RuntimeError: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero.

In [12]:
!ls lightning_logs/version_3/checkpoints

'epoch=3-step=7500.ckpt'


In [5]:
import torch

torch.cuda.is_available()

True