**Multi-GPU working**  
https://www.kaggle.com/code/rasmus01610/notebook-multi-gpu-training-with-pytorch-lightning/notebook

In [82]:
#!pip install monai
#!pip install scikit-learn

In [83]:
import os
import PIL
import torch
import numpy as np

from monai.data import DataLoader, ArrayDataset
from torch.optim.lr_scheduler import CosineAnnealingLR, OneCycleLR,  CyclicLR
from torchvision import models
from monai.transforms import (
    EnsureChannelFirst,
    AsDiscrete,
    Compose,
    LoadImage,
    ScaleIntensity,
)
from torchvision import transforms
import glob
import os

from torch.utils.data import random_split
import pytorch_lightning as pl
from torch import optim
from sklearn.metrics import accuracy_score

In [84]:
#!wget https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz
#!tar -zxf MedNIST.tar.gz

In [85]:
path = "./PCBData"
images = glob.glob(path + '/*/*.jpg')
labels = [image.split('/')[-2] for image in images]
unique_labels = list(set(labels))
labels = [unique_labels.index(i) for i in labels]

In [86]:
num_classes = len(unique_labels)

train_transforms = Compose(
    [
        LoadImage(image_only=True),
        EnsureChannelFirst(),
        ScaleIntensity(),
    ]
)

y_trans = Compose([AsDiscrete(to_onehot=num_classes)])

In [87]:
ds = ArrayDataset(images, labels=labels, img_transform=train_transforms, label_transform=y_trans)
n_data = len(ds)
n_train = int(n_data*0.9)
train_ds, val_ds = random_split(ds, [n_train, n_data-n_train], generator=torch.Generator().manual_seed(42))

batch_size = 64
n_gpu = 4

In [88]:
len(ds)

12758

In [89]:
#train_dl = DataLoader(train_ds, shuffle=True, pin_memory=True, num_workers=2, batch_size=256)
#val_dl = DataLoader(val_ds, shuffle=False, pin_memory=True, num_workers=2, batch_size=256)

In [90]:
class PCBModel(pl.LightningModule):
    def __init__(self, net, lr, loss):
        super().__init__()
        self.net = net
        self.lr = lr
        self.loss = loss 
        
    def forward(self, x):
        return torch.nn.functional.softmax(self.net(x), dim=1)

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y.float())
        self.log("train_loss", loss)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y.float())
        val_acc = (y.argmax(dim=1) == y_hat.argmax(dim=1)).float().sum() / y.shape[0]
        self.log("val_acc", val_acc, prog_bar=True, sync_dist=True)
        self.log("val_loss", loss, prog_bar=True, sync_dist=True)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        #scheduler = CosineAnnealingLR(optimizer, self.trainer.max_epochs * 200, 0)
        #scheduler = CyclicLR(optimizer, base_lr=self.lr/100, max_lr=self.lr, step_size_up = 100)
        scheduler = OneCycleLR(optimizer, max_lr=self.lr, epochs=self.trainer.max_epochs,
                               steps_per_epoch=n_train//batch_size//n_gpu)
        return [optimizer], [scheduler]

    def setup(self, stage=None):
        path = "./PCBData"
        images = glob.glob(path + '/*/*.jpg')
        labels = [image.split('/')[-2] for image in images]
        unique_labels = list(set(labels))
        labels = [unique_labels.index(i) for i in labels]

        num_classes = len(unique_labels)

        train_transforms = Compose(
            [
                LoadImage(image_only=True),
                EnsureChannelFirst(),
                ScaleIntensity(),
                transforms.RandomHorizontalFlip(),
                transforms.RandomRotation(degrees=90),
                transforms.RandomRotation(degrees=180),
            ]
        )
        
        val_transforms = Compose(
            [
                LoadImage(image_only=True),
                EnsureChannelFirst(),
                ScaleIntensity(),
            ]
        )
        
        y_trans = Compose([AsDiscrete(to_onehot=num_classes)])

        n_data = len(images)
        n_train = int(n_data*0.9)
        import numpy as np
        perm = np.random.permutation(n_data)
        images = np.array(images)[perm]
        labels = np.array(labels)[perm]
        train_images = list(images[:n_train])
        val_images = list(images[n_train:])
        train_labels = list(labels[:n_train])
        val_labels = list(labels[n_train:])
        
        #ds = ArrayDataset(images, labels=labels, img_transform=train_transforms, label_transform=y_trans)
        #self.train_ds, self.val_ds = random_split(ds, [n_train, n_data-n_train], generator=torch.Generator().manual_seed(42))
        train_ds = ArrayDataset(train_images, labels=train_labels, img_transform=train_transforms, label_transform=y_trans)
        val_ds = ArrayDataset(val_images, labels=val_labels, img_transform=val_transforms, label_transform=y_trans)

    def aug():
        return Compose([
                        transforms.RandomHorizontalFlip(),
                        transforms.RandomRotation(degrees=90),
                        transforms.RandomRotation(degrees=180),
                       ])


    def train_dataloader(self):
        return DataLoader(self.train_ds, shuffle=True, pin_memory=True, num_workers=2, batch_size=batch_size)
        
    def val_dataloader(self):
        return DataLoader(self.val_ds, shuffle=False, pin_memory=True, num_workers=2, batch_size=batch_size)
        

In [91]:
resnet_pretrained = models.efficientnet_b2(pretrained=True)

In [92]:
resnet_pretrained

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [93]:
num_ftrs = resnet_pretrained.classifier[1].in_features
resnet_pretrained.classifier[1] = torch.nn.Linear(num_ftrs, num_classes)

In [94]:
#net = DenseNet121(spatial_dims=2, in_channels=3, out_channels=num_classes)
net = resnet_pretrained
lr = 1e-3
loss = torch.nn.CrossEntropyLoss()
model = PCBModel(net, lr, loss)

In [95]:
trainer = pl.Trainer(accelerator="gpu",devices=n_gpu,strategy="ddp_notebook", max_epochs=20, log_every_n_steps=40)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [96]:
#trainer.fit(model, train_dl, val_dl)
trainer.fit(model)

Initializing distributed: GLOBAL_RANK: 0, MEMBER: 1/4
Initializing distributed: GLOBAL_RANK: 1, MEMBER: 2/4
Initializing distributed: GLOBAL_RANK: 2, MEMBER: 3/4
Initializing distributed: GLOBAL_RANK: 3, MEMBER: 4/4
----------------------------------------------------------------------------------------------------
distributed_backend=nccl
All distributed processes registered. Starting with 4 processes
----------------------------------------------------------------------------------------------------

You are using a CUDA device ('NVIDIA GeForce RTX 3090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 2 - CUDA_VISIBLE_DEVICES: [0,1,2,3]
LOCAL_RANK: 3 - CUDA_VISIBLE_DEV

Sanity Checking: |                                                                          | 0/? [00:00<?, ?i…

ProcessRaisedException: 

-- Process 2 terminated with the following error:
Traceback (most recent call last):
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/torch/multiprocessing/spawn.py", line 68, in _wrap
    fn(i, *args)
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/strategies/launchers/multiprocessing.py", line 170, in _wrapping_function
    results = function(*args, **kwargs)
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 580, in _fit_impl
    self._run(model, ckpt_path=ckpt_path)
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 989, in _run
    results = self._run_stage()
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1033, in _run_stage
    self._run_sanity_check()
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1062, in _run_sanity_check
    val_loop.run()
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/loops/utilities.py", line 182, in _decorator
    return loop_run(self, *args, **kwargs)
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 109, in run
    self.setup_data()
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/loops/evaluation_loop.py", line 165, in setup_data
    dataloaders = _request_dataloader(source)
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 342, in _request_dataloader
    return data_source.dataloader()
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py", line 306, in dataloader
    return call._call_lightning_module_hook(self.instance.trainer, self.name, pl_module=self.instance)
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/pytorch_lightning/trainer/call.py", line 157, in _call_lightning_module_hook
    output = fn(*args, **kwargs)
  File "/tmp/ipykernel_241622/316862769.py", line 93, in val_dataloader
    return DataLoader(self.val_ds, shuffle=False, pin_memory=True, num_workers=2, batch_size=batch_size)
  File "/home/kotech/venv-lightning/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1688, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'PCBModel' object has no attribute 'val_ds'


In [None]:
f = [
    'a', 'b', 'c']
n = [1,2,3]
import numpy as np
perm = np.random.permutation(3)
f = np.array(f)
n = np.array(n)
list(f[perm]), list(n[perm])