In [1]:
import pandas as pd
import torch
from pytorch_lightning import LightningModule, LightningDataModule
from torch.nn import Sequential, LazyLinear, ReLU, MSELoss
from torch.optim import Adam
from torch.utils.data import random_split, DataLoader
from torchdata.datapipes.map import SequenceWrapper, Zipper


class MyNet(LightningModule):
    def __init__(self, nhidden=64, lr=1e-3):
        super().__init__()
        
        self.net = Sequential(
            LazyLinear(nhidden), ReLU(),
            LazyLinear(1)
        )
        self.loss = MSELoss()
        
        self.lr = lr

    def configure_optimizers(self):
        return Adam(self.parameters(), lr=self.lr)
        
    def forward(self, x):
        return self.net(x)
        
    def _training_step(self, batch, loss_name):
        inputs, targets = batch
        outputs = self.forward(inputs)
        loss = self.loss(outputs, targets)
        self.log(loss_name, loss)
        return loss

    def training_step(self, batch, *args, **kwargs):
        return self._training_step(batch, 'train')
    def validation_step(self, batch, *args, **kwargs):
        return self._training_step(batch, 'val')
    
    
class MyData(LightningDataModule):
    def __init__(self, batch_size):
        super().__init__()
        self.batch_size = batch_size
        self.df = pd.read_csv('pantheon+SH0ES.dat', delimiter=' ')
        self.dataset = Zipper(*(
            SequenceWrapper(torch.tensor(self.df[keys].to_numpy(), dtype=torch.get_default_dtype()))
            for keys in (
                ['zCMB', 'x1', 'c', 'HOST_LOGMASS', 'HOST_ANGSEP', 'VPEC'],
                ['mB']
            )
        ))
    
    def prepare_data(self):
        self.train_dataset, self.val_dataset = random_split(
            self.dataset, [0.91, 0.09],
            generator=torch.Generator().manual_seed(42)
        )

    def train_dataloader(self):
        return DataLoader(self.train_dataset, shuffle=True, batch_size=self.batch_size)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, shuffle=True, batch_size=self.batch_size)

In [None]:
from pytorch_lightning import Trainer

net = MyNet()
data = MyData(10)

trainer = Trainer()
trainer.fit(net, datamodule=data)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
C:\Users\kosio\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\loops\utilities.py:73: `max_epochs` was not set. Setting it to 1000 epochs. To train without an epoch limit, set `max_epochs=-1`.
Missing logger folder: C:\Users\kosio\Projects\hpc_for_data_science_2023-2024\practical\02\lightning_logs
C:\Users\kosio\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\utilities\model_summary\model_summary.py:454: A layer with UninitializedParameter was found. Thus, the total number of parameters detected may be inaccurate.

  | Name | Type       | Params
------------------------------------
0 | net  | Sequential | 0     
1 | loss | MSELoss    | 0     
------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated mo

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

C:\Users\kosio\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
C:\Users\kosio\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
C:\Users\kosio\AppData\Local\Programs\Python\Python312\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]