<a href="https://colab.research.google.com/github/nirb28/nn_catalyst/blob/main/src/pl/scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [38]:
%%capture
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running in Colab!")
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    from google.colab import userdata
    !pip install pytorch_lightning
else:
    print("Not running in Colab.")


In [59]:
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import random_split
import pytorch_lightning as pl
import torch, math, os
from torch.utils.data import Dataset, DataLoader
import numpy as np

def resolve_path_gdrive(relativePath):
    if os.path.exists('/content/drive'):
        return '/content/drive/MyDrive/work/gdrive-workspaces/git/nn_catalyst/' + relativePath
    else:
        from utils import get_project_root
        return get_project_root() + "/" + relativePath

class CatalystDataset(Dataset):

    def __init__(self, datafile='src/pl/merged_data_last29.csv'):
        # Initialize data, download, etc.
        # read with numpy or pandas
        print("Loading data")
        xy = np.loadtxt(resolve_path_gdrive(datafile), delimiter=',', skiprows=1, max_rows=10, dtype=float)
        self.n_samples = xy.shape[0]
        # here the first column is the class label, the rest are the features
        self.x_data = torch.from_numpy(xy[:,:-29])  # size [n_samples, n_features]
        self.y_data = torch.from_numpy(xy[:,-29:-28])  # size [n_samples, 1]
        print(self.y_data)

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        sample = self.x_data[index], self.y_data[index]
        return sample

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples

class CatalystDataModule(pl.LightningDataModule):
    def __init__(self, data_dir, batch_size, num_workers):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.entire_dataset = CatalystDataset()

    def prepare_data(self):
        train_set_size = int(len(self.entire_dataset) * 0.8)
        test_set_size = int(len(self.entire_dataset) * 0.1)
        valid_set_size = len(self.entire_dataset) - train_set_size - test_set_size
        self.train_ds, self.val_ds, self.test_ds = random_split(
            self.entire_dataset, [train_set_size, valid_set_size, test_set_size])
        return

    def setup(self, stage):
        pass

    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=True,
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
        )


In [62]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm
import pytorch_lightning as pl
import torchmetrics
from torchmetrics import Metric

class BaseModel(pl.LightningModule):
    def training_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log_dict(
            {
                "train_loss": loss,
            },
            on_step=False,
            on_epoch=True,
            prog_bar=True,
        )
        return {"loss": loss, "scores": scores, "y": y}

    def validation_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log("val_loss", loss)
        return loss

    def test_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log("test_loss", loss)
        return loss

    def _common_step(self, batch, batch_idx):
        x, y = batch
        x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        loss = self.loss_fn(scores, y)
        return loss, scores, y

    def predict_step(self, batch, batch_idx):
        x, y = batch
        x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        preds = torch.argmax(scores, dim=1)
        return preds

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=self.lr)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, patience=2, min_lr=1e-7, verbose=True)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_loss"}

class SingleTargetNet(BaseModel):

    def __init__(self, input_size, learning_rate, dropout_rate=0.5, target=1):
        super(SingleTargetNet, self).__init__()
        self.lr = learning_rate
        self.loss_fn = nn.MSELoss()

        self.fc1 = nn.Linear(input_size, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 1)
        self.fc_skip = nn.Linear(1024, 512)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x1 = F.relu(self.bn1(self.fc1(x)))
        x1 = self.dropout(x1)

        x2 = F.relu(self.bn2(self.fc2(x1)))
        x2 = self.dropout(x2)

        # Skip connection
        x2 += self.fc_skip(x1)

        x3 = self.fc3(x2)
        return x3

class RegressionNetwork(BaseModel):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(RegressionNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.relu = nn.LeakyReLU()
        self.dropout = nn.Dropout(0.1)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out = self.fc1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.bn2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out



In [63]:
# Training hyperparameters
INPUT_SIZE = 1479
NUM_CLASSES = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 64
NUM_EPOCHS = 150

# Dataset
DATA_DIR = "dataset/"
NUM_WORKERS = 4

# Compute related
ACCELERATOR = "gpu"
DEVICES = [0]
PRECISION = 64

In [64]:
dm1 = CatalystDataModule(
        data_dir=DATA_DIR,
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
    )

Loading data
tensor([[-0.2489],
        [-0.2242],
        [-0.2304],
        [-0.2333],
        [-0.1994],
        [-0.1900],
        [-0.2272],
        [-0.2141],
        [-0.1572],
        [-0.2121]], dtype=torch.float64)


In [65]:
import torch
import pytorch_lightning as pl
#from model import NN
#from dataset import CatalystDataModule
#import config
from pytorch_lightning.callbacks import EarlyStopping

torch.set_float32_matmul_precision("medium") # to make lightning happy

if __name__ == "__main__":
    model = SingleTargetNet (
        input_size=INPUT_SIZE,
        learning_rate=LEARNING_RATE,
    )
    dm = dm1
    trainer = pl.Trainer(
        accelerator=ACCELERATOR,
        devices=DEVICES,
        min_epochs=1,
        max_epochs=NUM_EPOCHS,
        precision=PRECISION,
        fast_dev_run=True,
        callbacks=[EarlyStopping(monitor="val_loss")],
    )
    trainer.fit(model, dm)
    trainer.validate(model, dm)
    trainer.test(model, dm)


INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.utilities.rank_zero:Running in `fast_dev_run` mode: will run the requested loop using 1 batch(es). Logging and checkpointing is suppressed.
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.callbacks.model_summary:
  | Name    | Type        | Params | Mode 
------------------------------------------------
0 | loss_fn | MSELoss     | 0      | train
1 | fc1     | Linear      | 1.5 M  | train
2 | bn1     | BatchNorm1d | 2.0 K  | train
3 | fc2     | Linear      | 524 K  | train
4 | bn2     | BatchNorm1d | 1.0 K  | train
5 | fc3     | Linear      | 513    | train
6 | fc_skip | Linear      | 524 K  | train
7 | dropout | Dropout     | 0      | train
-------------------------

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=1` reached.
  self.pid = os.fork()
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Testing: |          | 0/? [00:00<?, ?it/s]