<a href="https://colab.research.google.com/github/nirb28/nn_catalyst/blob/main/src/pl/scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%%capture
import sys
IN_COLAB = 'google.colab' in sys.modules
if IN_COLAB:
    print("Running in Colab!")
    from google.colab import drive
    drive.mount('/content/drive', force_remount=False)
    !pip install pytorch_lightning
    !pip install torchmetrics
else:
    print("Not running in Colab.")

In [2]:
from pytorch_lightning.loggers import TensorBoardLogger
import torch
DEBUG = False
# Training hyperparameters
INPUT_SIZE = 1479
NUM_TARGETS = 1
LEARNING_RATE = 0.001
BATCH_SIZE = 512
NUM_EPOCHS = 150
NUM_WORKERS = 0
# Compute related
ACCELERATOR = 'cuda' if torch.cuda.is_available() else 'cpu'
DEVICES = [0]
PRECISION = 32
CHECKPOINTS_FOLDER = "/checkpoints/stn_2_r1"

In [3]:
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import random_split
import pytorch_lightning as pl
import torch, math, os
from torch.utils.data import Dataset, DataLoader
import numpy as np

seed = 1234
pl.seed_everything(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

def resolve_path_gdrive(relativePath):
    if os.path.exists('/content/drive'):
        return '/content/drive/MyDrive/work/gdrive-workspaces/git/nn_catalyst/' + relativePath
    else:
        from utils import get_project_root
        return get_project_root() + "/" + relativePath

print(f"Root project folder is at {resolve_path_gdrive('.')}")

Seed set to 1234


Root project folder is at D:\ds\sync\gdrive\work\gdrive-workspaces\git\nn_catalyst/.


In [4]:
#%load_ext tensorboard
#%tensorboard --logdir f"/content/drive/MyDrive/work/gdrive-workspaces/git/nn_catalyst/checkpoints/{CHECKPOINTS_FOLDER}/lightning_logs"

In [5]:
import numpy as np
datafile='src/pl/merged_data_last29_reordered_byR2.csv'
max_rows=None
xy_orig = np.loadtxt(resolve_path_gdrive(datafile), delimiter=',', skiprows=1, dtype=float, max_rows=max_rows)

In [6]:
class BaseModel(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.r2 = torchmetrics.R2Score()
        self.loss_fn = nn.MSELoss()
        self.validation_step_outputs = []

    def training_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log_dict(
            {
                "train_loss": loss,
            },
            on_step=False,
            on_epoch=True,
            prog_bar=True,
        )
        accuracy = self.r2(scores, y)
        self.log("train_acc", accuracy, prog_bar=True)
        return {"loss": loss}

    def validation_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log("val_loss", loss)
        self.validation_step_outputs.append(loss)
        return loss

    def on_validation_epoch_end(self):
        epoch_average = torch.stack(self.validation_step_outputs).mean()
        self.log("validation_epoch_average", epoch_average)
        self.validation_step_outputs.clear()  # free memory

    def test_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log("test_loss", loss)
        return loss

    def _common_step(self, batch, batch_idx):
        x, y = batch
        x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        loss = self.loss_fn(scores, y)
        if DEBUG == True:
            print(f"loss: {loss}, len: {len(y)}")
        return loss, scores, y

    def predict_step(self, batch, batch_idx):
        x, y = batch
        x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        preds = torch.argmax(scores, dim=1)
        return preds

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(lr=self.lr, params=self.parameters())
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, min_lr=0.000000001, threshold=0.001)
        return {"optimizer": optimizer, "lr_scheduler": scheduler, "monitor": "val_loss"}

class SingleTargetNet(BaseModel):

    def __init__(self, input_size=INPUT_SIZE, learning_rate=0.001, dropout_rate=0.5, target=1):
        super(SingleTargetNet, self).__init__()
        self.lr = learning_rate
        self.loss_fn = nn.MSELoss()

        self.fc1 = nn.Linear(input_size, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 1)
        self.fc_skip = nn.Linear(1024, 512)
        self.dropout = nn.Dropout(dropout_rate)
        self.save_hyperparameters()

    def forward(self, x):
        x1 = F.relu(self.bn1(self.fc1(x)))
        x1 = self.dropout(x1)

        x2 = F.relu(self.bn2(self.fc2(x1)))
        x2 = self.dropout(x2)

        # Skip connection
        x2 += self.fc_skip(x1)

        x3 = self.fc3(x2)
        return x3


In [7]:
# prompt: write a function that takes in a numpy array and splits it into train, test and validation. it then scales all the data including the target columns. finally create a dataset and dataloader for all the 3 and wrap it into a datamodule

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

class CatalystDataModule(pl.LightningDataModule):
    def __init__(self, data, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS):
        super().__init__()
        self.data = data
        self.batch_size = batch_size
        self.num_workers = num_workers

    def prepare_data(self):
        # Split data into train, validation, and test sets
        X = self.data[:, :-1]
        y = self.data[:, -1]
        X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
        X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

        # Scale data using StandardScaler
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_val = scaler.transform(X_val)
        X_test = scaler.transform(X_test)

        y_train = y_train.reshape(-1, 1)
        y_val = y_val.reshape(-1, 1)
        y_test = y_test.reshape(-1, 1)

        y_train = scaler.fit_transform(y_train)
        y_val = scaler.transform(y_val)
        y_test = scaler.transform(y_test)

        # Create numpy arrays for the data
        self.train_data = np.concatenate((X_train, y_train), axis=1)
        self.val_data = np.concatenate((X_val, y_val), axis=1)
        self.test_data = np.concatenate((X_test, y_test), axis=1)

    def setup(self, stage=None):
        # Create datasets
        self.train_dataset = CatalystDataset(self.train_data)
        self.val_dataset = CatalystDataset(self.val_data)
        self.test_dataset = CatalystDataset(self.test_data)

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

class CatalystDataset(Dataset):
    def __init__(self, data):
        self.data = data
        self.x = torch.tensor(self.data[:, :-1], dtype=torch.float32)
        #self.y = torch.tensor(self.data[:, -1], dtype=torch.float32)
        self.y = torch.unsqueeze(
            torch.tensor(self.data[:, -1], dtype=torch.float32), 1).float()  # size [n_samples, 1]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [8]:
from torch import nn, optim
import torchmetrics
import torch.nn.functional as F
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, LearningRateMonitor
from pytorch_lightning.callbacks import RichProgressBar

def prepare_data_module(xy):
    dm = CatalystDataModule(data=xy)
    dm.prepare_data()
    dm.setup()
    return dm

def prepare_trainer(target, num_epochs=NUM_EPOCHS):
    tensorboard = TensorBoardLogger(resolve_path_gdrive(f'{CHECKPOINTS_FOLDER}/lightning_logs'), name=f"{target}")
    checkpoint_callback = ModelCheckpoint(
        dirpath=resolve_path_gdrive(f'{CHECKPOINTS_FOLDER}/{target}'),
        filename='{epoch:02d}-{val_loss:.2f}',
        save_top_k=1,
        verbose=True,
        monitor='val_loss',
        mode='min'
    )
    lr_monitor = LearningRateMonitor(logging_interval='step')
    trainer = pl.Trainer(
        accelerator=ACCELERATOR,
        devices=1,
        min_epochs=1,
        max_epochs=num_epochs,
        precision=PRECISION,
        fast_dev_run=False,
        enable_checkpointing=True,
        enable_progress_bar=True,
        log_every_n_steps=20,
        logger=tensorboard,
        callbacks=[checkpoint_callback, lr_monitor, RichProgressBar(),
                EarlyStopping(monitor="train_loss", patience=10, verbose=True, mode="min")]
    )
    return trainer

def prepare_model(input_size):
    model = SingleTargetNet (
        input_size=input_size,
        learning_rate=LEARNING_RATE,
    )
    return model

def iterate_all_targets(xy_data, total_targets):
    total_cols = xy_data.shape[1]
    for target_num in range(1, total_targets+1):
        target_col_start = total_cols - (total_targets - target_num)
        print(f'Target {target_num}, target_col {target_col_start}')
        xy_data = torch.from_numpy(xy_orig[:,:target_col_start]).float()  # size [n_samples, n_features]
        dm = prepare_data_module(xy_data)
        model = prepare_model(input_size=dm.train_dataset.x.shape[1])
        trainer = prepare_trainer(target=target_num, num_epochs=NUM_EPOCHS)
        trainer.fit(model, dm)
        trainer.validate(model, dm)
        trainer.test(model, dm)

torch.set_float32_matmul_precision("medium") # to make lightning happy
if __name__ == "__main__":
    iterate_all_targets(xy_orig, total_targets=5)

Target 1, target_col 1504


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:652: Checkpoint directory D:\ds\sync\gdrive\work\gdrive-workspaces\git\nn_catalyst\checkpoints\stn_2_r1\1 exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Metric train_loss improved. New best score: 2.240
Epoch 0, global step 36: 'val_loss' reached 0.55690 (best 0.55690), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\1\\epoch=00-val_loss=0.56.ckpt' as top 1
Metric train_loss improved by 1.719 >= min_delta = 0.0. New best score: 0.521
Epoch 1, global step 72: 'val_loss' reached 0.38745 (best 0.38745), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\1\\epoch=01-val_loss=0.39.ckpt' as top 1
Metric train_loss improved by 0.092 >= min_delta = 0.0. New best score: 0.429
Epoch 2, global step 108: 'val_loss' reached 0.33477 (best 0.33477), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\1\\epoch=02-val_loss=0.33.ckpt' as top 1
Metric train_loss improved by 0.058 >= min_delta = 0.0. New best score: 0.371
Epoch 3, global step 144: 'val_loss' reached 0.30808 (best 0.30808), savi

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Target 2, target_col 1505


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Missing logger folder: D:\ds\sync\gdrive\work\gdrive-workspaces\git\nn_catalyst//checkpoints/stn_2_r1/lightning_logs\2
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Metric train_loss improved. New best score: 2.136
Epoch 0, global step 36: 'val_loss' reached 0.60492 (best 0.60492), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\2\\epoch=00-val_loss=0.60.ckpt' as top 1
Metric train_loss improved by 1.497 >= min_delta = 0.0. New best score: 0.639
Epoch 1, global step 72: 'val_loss' reached 0.48948 (best 0.48948), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\2\\epoch=01-val_loss=0.49.ckpt' as top 1
Metric train_loss improved by 0.074 >= min_delta = 0.0. New best score: 0.565
Epoch 2, global step 108: 'val_loss' reached 0.46307 (best 0.46307), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\2\\epoch=02-val_loss=0.46.ckpt' as top 1
Metric train_loss improved by 0.050 >= min_delta = 0.0. New best score: 0.515
Epoch 3, global step 144: 'val_loss' reached 0.43985 (best 0.43985), savi

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Target 3, target_col 1506


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Missing logger folder: D:\ds\sync\gdrive\work\gdrive-workspaces\git\nn_catalyst//checkpoints/stn_2_r1/lightning_logs\3
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Metric train_loss improved. New best score: 2.514
Epoch 0, global step 36: 'val_loss' reached 0.68258 (best 0.68258), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\3\\epoch=00-val_loss=0.68.ckpt' as top 1
Metric train_loss improved by 1.811 >= min_delta = 0.0. New best score: 0.704
Epoch 1, global step 72: 'val_loss' reached 0.57542 (best 0.57542), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\3\\epoch=01-val_loss=0.58.ckpt' as top 1
Metric train_loss improved by 0.106 >= min_delta = 0.0. New best score: 0.597
Epoch 2, global step 108: 'val_loss' reached 0.50991 (best 0.50991), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\3\\epoch=02-val_loss=0.51.ckpt' as top 1
Metric train_loss improved by 0.051 >= min_delta = 0.0. New best score: 0.546
Epoch 3, global step 144: 'val_loss' reached 0.48509 (best 0.48509), savi

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Target 4, target_col 1507


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Missing logger folder: D:\ds\sync\gdrive\work\gdrive-workspaces\git\nn_catalyst//checkpoints/stn_2_r1/lightning_logs\4
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Metric train_loss improved. New best score: 2.073
Epoch 0, global step 36: 'val_loss' reached 0.64559 (best 0.64559), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\4\\epoch=00-val_loss=0.65.ckpt' as top 1
Metric train_loss improved by 1.394 >= min_delta = 0.0. New best score: 0.679
Epoch 1, global step 72: 'val_loss' reached 0.53865 (best 0.53865), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\4\\epoch=01-val_loss=0.54.ckpt' as top 1
Metric train_loss improved by 0.067 >= min_delta = 0.0. New best score: 0.612
Epoch 2, global step 108: 'val_loss' reached 0.52953 (best 0.52953), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\4\\epoch=02-val_loss=0.53.ckpt' as top 1
Metric train_loss improved by 0.044 >= min_delta = 0.0. New best score: 0.568
Epoch 3, global step 144: 'val_loss' reached 0.50916 (best 0.50916), savi

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Target 5, target_col 1508


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Missing logger folder: D:\ds\sync\gdrive\work\gdrive-workspaces\git\nn_catalyst//checkpoints/stn_2_r1/lightning_logs\5
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

Metric train_loss improved. New best score: 1.881
Epoch 0, global step 36: 'val_loss' reached 0.68040 (best 0.68040), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\5\\epoch=00-val_loss=0.68.ckpt' as top 1
Metric train_loss improved by 1.205 >= min_delta = 0.0. New best score: 0.677
Epoch 1, global step 72: 'val_loss' reached 0.58298 (best 0.58298), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\5\\epoch=01-val_loss=0.58.ckpt' as top 1
Metric train_loss improved by 0.078 >= min_delta = 0.0. New best score: 0.599
Epoch 2, global step 108: 'val_loss' reached 0.52683 (best 0.52683), saving model to 'D:\\ds\\sync\\gdrive\\work\\gdrive-workspaces\\git\\nn_catalyst\\checkpoints\\stn_2_r1\\5\\epoch=02-val_loss=0.53.ckpt' as top 1
Metric train_loss improved by 0.060 >= min_delta = 0.0. New best score: 0.540
Epoch 3, global step 144: 'val_loss' reached 0.48329 (best 0.48329), savi

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Output()

d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


In [9]:
from torchmetrics import R2Score
from torchmetrics import MeanSquaredError

def r2scoreAndMSE(model, dataloader):
    r2_score_metric = R2Score()
    mse = MeanSquaredError()
    for batch_idx, (data, target) in enumerate(dataloader):
        predictions = model(data)
        #import pdb; pdb.set_trace()
        r2_score_metric.update(predictions, target)
        mse.update(predictions, target)
    return r2_score_metric.compute().detach().item(), mse.compute().detach().item()

In [12]:
from pathlib import Path
from itertools import chain

def measure_all_targets(xy_data, total_targets):
    total_cols = xy_data.shape[1]
    for target_num in range(1, total_targets+1):
        target_col_start = total_cols - (total_targets - target_num)
        print(f'Target {target_num}, target_col {target_col_start}')
        xy_data = torch.from_numpy(xy_orig[:,:target_col_start]).float()  # size [n_samples, n_features]
        dm = prepare_data_module(xy_data)
        if DEBUG == True:
            print(f"Train set size: {len(dm.train_ds),dm.train_ds.dataset.x.shape[1]}")
            print(f"Test set size: {len(dm.test_ds)}, Valid set size: {len(dm.val_ds)}")

        checkpoint_path=resolve_path_gdrive(f'{CHECKPOINTS_FOLDER}/{target_num}')
        pathlist = Path(checkpoint_path).glob('**/*.ckpt')
        for path in pathlist:
            # because path is object not string
            model = SingleTargetNet.load_from_checkpoint(str(path))
            model.eval()
            model.cpu()
            # add row to resultsDF
            train_r2, train_mse = r2scoreAndMSE(model, dm.train_dataloader())
            val_r2, val_mse = r2scoreAndMSE(model, dm.val_dataloader())
            test_r2, test_mse = r2scoreAndMSE(model, dm.test_dataloader())

            results.append([target, os.path.basename(path), train_r2, train_mse, val_r2, val_mse, test_r2, test_mse])

results = []        
if __name__ == "__main__":
    measure_all_targets(xy_orig, total_targets=5)

Target 1, target_col 1504


d:\ds\work\utilities\conda\envs\nn_310_2\lib\site-packages\lightning_fabric\utilities\cloud_io.py:57: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
d:\ds\work\utilities\conda\envs\nn_310_2\l

RuntimeError: mat1 and mat2 shapes cannot be multiplied (512x1503 and 1479x1024)

In [None]:
from pathlib import Path
import pandas as pd
from itertools import chain

results = []
if __name__ == "__main__":
    # do everything in a loop for all the targets
    for target in range(1, ):
        print(f"Target: {target}")
        dm = CatalystDataModule(
            data_dir="",
            batch_size=BATCH_SIZE,
            num_workers=NUM_WORKERS,
            target_num=target
        )
        dm.prepare_data()
        dm.setup()
        if DEBUG == True:
            print(f"Train set size: {len(dm.train_ds),dm.train_ds.dataset.x_data.shape[1]}")
            print(f"Test set size: {len(dm.test_ds)}, Valid set size: {len(dm.val_ds)}")

        checkpoint_path=resolve_path_gdrive(f'{CHECKPOINTS_FOLDER}/{dm.target_num}')
        pathlist = Path(checkpoint_path).glob('**/*.ckpt')
        for path in pathlist:
            # because path is object not string
            model = SingleTargetNet.load_from_checkpoint(str(path))
            model.eval()
            model.cpu()
            # add row to resultsDF
            train_r2, train_mse = r2scoreAndMSE(model, dm.train_dataloader())
            val_r2, val_mse = r2scoreAndMSE(model, dm.val_dataloader())
            test_r2, test_mse = r2scoreAndMSE(model, dm.test_dataloader())

            results.append([target, os.path.basename(path), train_r2, train_mse, val_r2, val_mse, test_r2, test_mse])


Target: 1
Train set size: 20986, Test set size: 2623, Valid set size: 2624


In [None]:
resultsDFcolumns = ["Target", "ModelFile", "Train R2", "Train MSE", "Val R2", "Val MSE", "Test R2", "Test MSE"]
resultsDF = pd.DataFrame(results, columns=resultsDFcolumns)
resultsDF

Unnamed: 0,Target,ModelFile,Train R2,Train MSE,Val R2,Val MSE,Test R2,Test MSE
0,1,epoch=56-val_loss=0.00.ckpt,0.998835,0.001165,0.998835,0.001165,0.998835,0.001165
1,2,epoch=97-val_loss=0.00.ckpt,0.998941,0.001059,0.998941,0.001059,0.998941,0.001059
2,3,epoch=69-val_loss=0.00.ckpt,0.998975,0.001025,0.998975,0.001025,0.998975,0.001025
3,4,epoch=86-val_loss=0.00.ckpt,0.999022,0.000978,0.999022,0.000978,0.999022,0.000978
4,5,epoch=91-val_loss=0.00.ckpt,0.999025,0.000975,0.999025,0.000975,0.999025,0.000975
5,6,epoch=80-val_loss=0.00.ckpt,0.99907,0.00093,0.99907,0.00093,0.99907,0.00093
6,7,epoch=148-val_loss=0.02.ckpt,0.983475,0.016525,0.983475,0.016525,0.983475,0.016525
7,8,epoch=146-val_loss=0.02.ckpt,0.983833,0.016167,0.983833,0.016167,0.983833,0.016167
8,9,epoch=144-val_loss=0.01.ckpt,0.987937,0.012063,0.987937,0.012063,0.987937,0.012063
9,10,epoch=58-val_loss=0.03.ckpt,0.965896,0.034103,0.965896,0.034103,0.965896,0.034103


In [None]:
resultsDF.to_csv(resolve_path_gdrive(f'{CHECKPOINTS_FOLDER}/results.csv'), index=False)

In [None]:
dm1 = CatalystDataModule(
        data_dir="",
        batch_size=BATCH_SIZE,
        num_workers=NUM_WORKERS,
        target_num=target
    )
dm1.prepare_data()
dm1.setup()

print(r2scoreAndMSE(model, dm1.train_dataloader()))
print(r2scoreAndMSE(model, dm1.val_dataloader()))
print(r2scoreAndMSE(model, dm1.test_dataloader()))

In [None]:
import numpy as np
matrix = np.array([[1, 2, 3, 4],
                   [5, 6, 7, 8],
                   [9, 10, 11, 12]])
m = np.delete(matrix, [-1], axis=1)
m

# Learn dataset manipulation


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

class HelloWorldDataset(Dataset):
    def __init__(self):
        self.data = ["Hello", "World"]

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

dataset = HelloWorldDataset()
dataloader = DataLoader(dataset, batch_size=2)

for batch in dataloader:
    print(batch)

['Hello']
['World']
