# Intro to Lightning with Sensitive

In [1]:
import lightning as L
from torch import nn
import os
import torch
CHECKPOINT_PATH = "runs"
from lightning.pytorch.callbacks import LearningRateMonitor, ModelCheckpoint
from types import SimpleNamespace
# import transforms
from torchvision import transforms
from torchvision.datasets import CIFAR10
import torch.utils.data as data
import torch.optim as optim

In [2]:
# Path to the folder where the datasets are/should be downloaded (e.g. CIFAR10)
DATASET_PATH = os.environ.get("PATH_DATASETS", "data/")
# Path to the folder where the pretrained models are saved
CHECKPOINT_PATH = os.environ.get("PATH_CHECKPOINT", "saved_models/ConvNets")


# Function for setting the seed
L.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
# device = torch.device("mps")

Seed set to 42


## Data Handling

In [3]:
train_dataset = CIFAR10(root=DATASET_PATH, train=True, download=True)
DATA_MEANS = (train_dataset.data / 255.0).mean(axis=(0, 1, 2))
DATA_STD = (train_dataset.data / 255.0).std(axis=(0, 1, 2))
print("Data mean", DATA_MEANS)
print("Data std", DATA_STD)

Files already downloaded and verified
Data mean [0.49139968 0.48215841 0.44653091]
Data std [0.24703223 0.24348513 0.26158784]


In [4]:
test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(DATA_MEANS, DATA_STD)])
# For training, we add some augmentation. Networks are too powerful and would overfit.
train_transform = transforms.Compose(
    [
        transforms.RandomHorizontalFlip(),
        transforms.RandomResizedCrop((32, 32), scale=(0.8, 1.0), ratio=(0.9, 1.1)),
        transforms.ToTensor(),
        transforms.Normalize(DATA_MEANS, DATA_STD),
    ]
)
# Loading the training dataset. We need to split it into a training and validation part
# We need to do a little trick because the validation set should not use the augmentation.
train_dataset = CIFAR10(root=DATASET_PATH, train=True, transform=train_transform, download=True)
val_dataset = CIFAR10(root=DATASET_PATH, train=True, transform=test_transform, download=True)
L.seed_everything(42)
train_set, _ = torch.utils.data.random_split(train_dataset, [45000, 5000])
L.seed_everything(42)
_, val_set = torch.utils.data.random_split(val_dataset, [45000, 5000])

# Loading the test set
test_set = CIFAR10(root=DATASET_PATH, train=False, transform=test_transform, download=True)

# We define a set of data loaders that we can use for various purposes later.
train_loader = data.DataLoader(train_set, batch_size=128, shuffle=True, drop_last=True, pin_memory=True, num_workers=4)
val_loader = data.DataLoader(val_set, batch_size=128, shuffle=False, drop_last=False, num_workers=4)
test_loader = data.DataLoader(test_set, batch_size=128, shuffle=False, drop_last=False, num_workers=4)

Files already downloaded and verified
Files already downloaded and verified


Seed set to 42
Seed set to 42


Files already downloaded and verified


## Alternative Data Handling with Sensitive

In [5]:
import torch
from spotpython.hyperparameters.values import set_control_key_value
from spotpython.utils.init import fun_control_init
from spotpython.data.pkldataset import PKLDataset
import lightning as L
import torch
from torchvision import transforms
dataset = PKLDataset(directory="./userData/",
                     filename="data_sensitive.pkl",
                     target_column='N',
                     feature_type=torch.float32,
                     target_type=torch.float32,
                     rmNA=True)
fun_control = fun_control_init()
set_control_key_value(control_dict=fun_control,
                        key="data_set",
                        value=dataset,
                        replace=True)
print(len(dataset))

Seed set to 123


2381


In [6]:
L.seed_everything(42)
traindata_set, test_set = torch.utils.data.random_split(dataset, [0.7, 0.3])
L.seed_everything(42)
train_set, val_set = torch.utils.data.random_split(traindata_set, [0.7, 0.3])
print(len(train_set), len(val_set), len(test_set))


Seed set to 42
Seed set to 42


1167 500 714


In [7]:
# compute the mean and std of the dataset
DATA_MEANS = traindata_set.dataset.data.mean(axis=0)
DATA_STD = traindata_set.dataset.data.std(axis=0)
data_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(DATA_MEANS, DATA_STD)])
# TODO: normalize the data sets

In [8]:
# Set batch size for DataLoader
batch_size = 2
# Create DataLoader
from torch.utils.data import DataLoader
dataloader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

# Iterate over the data in the DataLoader
for batch in dataloader:
    inputs, targets = batch
    print(f"Batch Size: {inputs.size(0)}")
    print(f"Inputs Shape: {inputs.shape}")
    print(f"Targets Shape: {targets.shape}")
    print("---------------")
    print(f"Inputs: {inputs}")
    print(f"Targets: {targets}")
    break

Batch Size: 2
Inputs Shape: torch.Size([2, 133])
Targets Shape: torch.Size([2])
---------------
Inputs: tensor([[2.5100e+02, 1.3770e+03, 3.0000e+00, 2.0000e+00, 1.0000e+00, 6.0000e+00,
         8.4600e+02, 1.1140e+03, 9.0000e+00, 1.2080e+03, 4.0000e+00, 6.0000e+00,
         1.8330e+03, 1.7910e+03, 1.5590e+03, 2.3000e+01, 3.9000e+01, 5.0000e+01,
         3.3000e+01, 0.0000e+00, 0.0000e+00, 3.4000e+01, 2.9000e+01, 0.0000e+00,
         8.2700e+02, 9.1000e+02, 1.2100e+03, 1.2090e+03, 1.2080e+03, 1.7200e+03,
         1.3600e+03, 1.2000e+01, 8.4600e+02, 1.0800e+03, 7.2100e+02, 6.8400e+02,
         6.8200e+02, 6.7900e+02, 5.4700e+02, 6.8800e+02, 6.4400e+02, 1.5200e+02,
         5.3800e+02, 6.4900e+02, 3.6200e+02, 3.8000e+01, 5.4300e+02, 6.0200e+02,
         5.7800e+02, 4.6200e+02, 7.6000e+01, 6.2600e+02, 1.1800e+02, 3.1400e+02,
         5.4100e+02, 6.2100e+02, 6.8400e+02, 6.8800e+02, 3.6200e+02, 6.8400e+02,
         6.8400e+02, 5.9000e+02, 5.9000e+02, 6.7600e+02, 4.0500e+02, 4.3700e+02,
     

In [9]:
# We define a set of data loaders that we can use for various purposes later.
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, drop_last=True, pin_memory=True, num_workers=0)
val_loader = DataLoader(val_set, batch_size=128, shuffle=False, drop_last=False, num_workers=0)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False, drop_last=False, num_workers=0)

## Model Building

In [10]:
class NetLightRegression(L.LightningModule):
    def __init__(
        self,
        l1: int,
        epochs: int,
        batch_size: int,
        initialization: str,
        act_fn: nn.Module,
        optimizer: str,
        dropout_prob: float,
        lr_mult: float,
        patience: int,
        _L_in: int,
        _L_out: int,
    ):
        """NetLightRegression.

        Args:
            model_name: Name of the model/CNN to run. Used for creating the model (see function below)
            model_hparams: Hyperparameters for the model, as dictionary.
            optimizer_name: Name of the optimizer to use. Currently supported: Adam, SGD
            optimizer_hparams: Hyperparameters for the optimizer, as dictionary. This includes learning rate, weight decay, etc.
        """
        # super().__init__()
        # # Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
        # self.save_hyperparameters()
        # # Create model
        # self.model = create_model(model_name, model_hparams)
        # # Create loss module
        # self.loss_module = nn.CrossEntropyLoss()
        # # Example input for visualizing the graph in Tensorboard
        # self.example_input_array = torch.zeros((1, 3, 32, 32), dtype=torch.float32)

        super().__init__()
        self._L_in = _L_in
        self._L_out = _L_out
        # _L_in and _L_out are not hyperparameters, but are needed to create the network
        self.save_hyperparameters(ignore=["_L_in", "_L_out"])
        # set dummy input array for Tensorboard Graphs
        # set log_graph=True in Trainer to see the graph (in traintest.py)
        self.example_input_array = torch.zeros((batch_size, self._L_in))
        if self.hparams.l1 < 4:
            raise ValueError("l1 must be at least 4")

        hidden_sizes = [self.hparams.l1, self.hparams.l1 // 2, self.hparams.l1 // 2, self.hparams.l1 // 4]

        # Create the network based on the specified hidden sizes
        layers = []
        layer_sizes = [self._L_in] + hidden_sizes
        layer_size_last = layer_sizes[0]
        for layer_size in layer_sizes[1:]:
            layers += [
                nn.Linear(layer_size_last, layer_size),
                self.hparams.act_fn,
                nn.Dropout(self.hparams.dropout_prob),
            ]
            layer_size_last = layer_size
        layers += [nn.Linear(layer_sizes[-1], self._L_out)]
        # nn.Sequential summarizes a list of modules into a single module, applying them in sequence
        self.layers = nn.Sequential(*layers)

    # def forward(self, imgs):
    #     return self.model(imgs)
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.layers(x)
        return x

    # def configure_optimizers(self):
    #     if self.hparams.optimizer_name == "Adam":
    #         optimizer = optim.AdamW(self.parameters(), **self.hparams.optimizer_hparams)
    #     elif self.hparams.optimizer_name == "SGD":
    #         optimizer = optim.SGD(self.parameters(), **self.hparams.optimizer_hparams)
    #     else:
    #         assert False, f'Unknown optimizer: "{self.hparams.optimizer_name}"'
    # # We will reduce the learning rate by 0.1 after 100 and 150 epochs
    #     scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[100, 150], gamma=0.1)
    #     return [optimizer], [scheduler]
    def configure_optimizers(self) -> torch.optim.Optimizer:
        # optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        optimizer = optimizer_handler(
            optimizer_name=self.hparams.optimizer, params=self.parameters(), lr_mult=self.hparams.lr_mult
        )
        return optimizer

    # def training_step(self, batch, batch_idx):
    #     # "batch" is the output of the training data loader.
    #     imgs, labels = batch
    #     preds = self.model(imgs)
    #     loss = self.loss_module(preds, labels)
    #     acc = (preds.argmax(dim=-1) == labels).float().mean()
    #     # Logs the accuracy per epoch to tensorboard (weighted average over batches)
    #     self.log("train_acc", acc, on_step=False, on_epoch=True)
    #     self.log("train_loss", loss)
    #     return loss  # Return tensor to call ".backward" on
    def training_step(self, batch: tuple) -> torch.Tensor:
        x, y = batch
        y = y.view(len(y), 1)
        y_hat = self(x)
        val_loss = F.mse_loss(y_hat, y)
        # mae_loss = F.l1_loss(y_hat, y)
        # self.log("train_loss", val_loss, on_step=True, on_epoch=True, prog_bar=True)
        # self.log("train_mae_loss", mae_loss, on_step=True, on_epoch=True, prog_bar=True)
        return val_loss

    # def validation_step(self, batch, batch_idx):
    #     imgs, labels = batch
    #     preds = self.model(imgs).argmax(dim=-1)
    #     acc = (labels == preds).float().mean()
    #     # By default logs it per epoch (weighted average over batches)
    #     self.log("val_acc", acc)
    def validation_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
        x, y = batch
        y = y.view(len(y), 1)
        y_hat = self(x)
        val_loss = F.mse_loss(y_hat, y)
        # mae_loss = F.l1_loss(y_hat, y)
        # self.log("val_loss", val_loss, on_step=False, on_epoch=True, prog_bar=prog_bar)
        self.log("val_loss", val_loss, prog_bar=prog_bar)
        self.log("hp_metric", val_loss, prog_bar=prog_bar)
        return val_loss

    # def test_step(self, batch, batch_idx):
    #     imgs, labels = batch
    #     preds = self.model(imgs).argmax(dim=-1)
    #     acc = (labels == preds).float().mean()
    #     # By default logs it per epoch (weighted average over batches), and returns it afterwards
    #     self.log("test_acc", acc)
    def test_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
        x, y = batch
        y_hat = self(x)
        y = y.view(len(y), 1)
        val_loss = F.mse_loss(y_hat, y)
        # mae_loss = F.l1_loss(y_hat, y)
        self.log("val_loss", val_loss, prog_bar=prog_bar)
        self.log("hp_metric", val_loss, prog_bar=prog_bar)
        return val_loss

In [11]:
import lightning as L
from spotpython.data.lightdatamodule import LightDataModule
from spotpython.utils.eda import generate_config_id
from pytorch_lightning.loggers import TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from lightning.pytorch.callbacks import ModelSummary
from spotpython.torch.initialization import kaiming_init, xavier_init
import os
# def train_model(model_name, save_name=None, **kwargs):
    # """Train model.

    # Args:
    #     model_name: Name of the model you want to run. Is used to look up the class in "model_dict"
    #     save_name (optional): If specified, this name will be used for creating the checkpoint and logging directory.
    # """
    # if save_name is None:
    #     save_name = model_name
def train_model(config: dict, fun_control: dict) -> float:    
    _L_in = fun_control["_L_in"]
    _L_out = fun_control["_L_out"]
    if fun_control["enable_progress_bar"] is None:
        enable_progress_bar = False
    else:
        enable_progress_bar = fun_control["enable_progress_bar"]
    # config id is unique. Since the model is not loaded from a checkpoint,
    # the config id is generated here with a timestamp.
    config_id = generate_config_id(config, timestamp=True)
    model = fun_control["core_model"](**config, _L_in=_L_in, _L_out=_L_out)
    initialization = config["initialization"]
    if initialization == "Xavier":
        xavier_init(model)
    elif initialization == "Kaiming":
        kaiming_init(model)
    else:
        pass
    dm = LightDataModule(
        dataset=fun_control["data_set"],
        batch_size=config["batch_size"],
        num_workers=fun_control["num_workers"],
        test_size=fun_control["test_size"],
        test_seed=fun_control["test_seed"],
    )
    dm.setup()
    print(f"train_model(): Test set size: {len(dm.data_test)}")
    print(f"train_model(): Train set size: {len(dm.data_train)}")
    print(f"train_model(): Batch size: {config['batch_size']}")

    # # Create a PyTorch Lightning trainer with the generation callback
    # trainer = L.Trainer(
    #     default_root_dir=os.path.join(CHECKPOINT_PATH, save_name),  # Where to save models
    #     # We run on a single GPU (if possible)
    #     accelerator="auto",
    #     devices=1,
    #     # How many epochs to train for if no patience is set
    #     max_epochs=180,
    #     callbacks=[
    #         ModelCheckpoint(
    #             save_weights_only=True, mode="max", monitor="val_acc"
    #         ),  # Save the best checkpoint based on the maximum val_acc recorded. Saves only weights and not optimizer
    #         LearningRateMonitor("epoch"),
    #     ],  # Log learning rate every epoch
    # )  # In case your notebook crashes due to the progress bar, consider increasing the refresh rate
    # trainer.logger._log_graph = True  # If True, we plot the computation graph in tensorboard
    # trainer.logger._default_hp_metric = None  # Optional logging argument that we don't need
    trainer = L.Trainer(
        # Where to save models
        default_root_dir=os.path.join(fun_control["CHECKPOINT_PATH"], config_id),
        max_epochs=model.hparams.epochs,
        accelerator=fun_control["accelerator"],
        devices=fun_control["devices"],
        logger=TensorBoardLogger(
            save_dir=fun_control["TENSORBOARD_PATH"], version=config_id, default_hp_metric=True, log_graph=True
        ),
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=config["patience"], mode="min", strict=False, verbose=False),
            ModelSummary(max_depth=-1)
        ],
        enable_progress_bar=enable_progress_bar,
    )


    # # Check whether pretrained model exists. If yes, load it and skip training
    # pretrained_filename = os.path.join(CHECKPOINT_PATH, save_name + ".ckpt")
    # if os.path.isfile(pretrained_filename):
    #     print(f"Found pretrained model at {pretrained_filename}, loading...")
    #     # Automatically loads the model with the saved hyperparameters
    #     model = CIFARModule.load_from_checkpoint(pretrained_filename)
    # else:
    #     L.seed_everything(42)  # To be reproducible
    #     model = CIFARModule(model_name=model_name, **kwargs)
    #     trainer.fit(model, train_loader, val_loader)
    #     model = CIFARModule.load_from_checkpoint(
    #         trainer.checkpoint_callback.best_model_path
    #     )  # Load best checkpoint after training
    
    # Pass the datamodule as arg to trainer.fit to override model hooks :)
    trainer.fit(model=model, datamodule=dm)
    # Test best model on validation and test set
    # result = trainer.validate(model=model, datamodule=dm, ckpt_path="last")
    result = trainer.validate(model=model, datamodule=dm)

    # # Test best model on validation and test set
    # val_result = trainer.test(model, dataloaders=val_loader, verbose=False)
    # test_result = trainer.test(model, dataloaders=test_loader, verbose=False)
    # result = {"test": test_result[0]["test_acc"], "val": val_result[0]["test_acc"]}

    # return model, result
    # unlist the result (from a list of one dict)
    result = result[0]
    print(f"train_model result: {result}")
    return result["val_loss"]

In [12]:
from spotpython.utils.device import getDevice
from math import inf
MAX_TIME = 10
FUN_EVALS = inf
FUN_REPEATS = 2
OCBA_DELTA = 1
REPEATS = 2
INIT_SIZE = 10
WORKERS = 0
PREFIX="032"
DEVICE = getDevice()
DEVICES = 1
TEST_SIZE = 0.3

In [13]:
from spotpython.utils.init import fun_control_init
import numpy as np
fun_control = fun_control_init(
    _L_in=133,
    _L_out=1,
    PREFIX=PREFIX,
    TENSORBOARD_CLEAN=True,
    device=DEVICE,
    enable_progress_bar=False,
    fun_evals=FUN_EVALS,
    fun_repeats=FUN_REPEATS,
    log_level=10,
    max_time=MAX_TIME,
    num_workers=WORKERS,
    ocba_delta = OCBA_DELTA,
    show_progress=True,
    test_size=TEST_SIZE,
    tolerance_x=np.sqrt(np.spacing(1)),
    verbosity=1,
    )

Seed set to 123


Moving TENSORBOARD_PATH: runs/ to TENSORBOARD_PATH_OLD: runs_OLD/runs_2024_01_12_11_07_29
Created spot_tensorboard_path: runs/spot_logs/032_p040025_2024-01-12_11-07-29 for SummaryWriter()


In [14]:
from spotpython.hyperparameters.values import set_control_key_value
from spotpython.data.pkldataset import PKLDataset
import torch
dataset = PKLDataset(directory="./userData/",
                     filename="data_sensitive.pkl",
                     target_column='N',
                     feature_type=torch.float32,
                     target_type=torch.float32,
                     rmNA=True)
set_control_key_value(control_dict=fun_control,
                        key="data_set",
                        value=dataset,
                        replace=True)
print(len(dataset))

2381


In [15]:
from spotpython.hyperparameters.values import add_core_model_to_fun_control
import sys
sys.path.insert(0, './userModel')
import netlightregression
import light_hyper_dict
add_core_model_to_fun_control(fun_control=fun_control,
                              core_model=netlightregression.NetLightRegression,
                              hyper_dict=light_hyper_dict.LightHyperDict)

In [16]:
from spotpython.utils.eda import gen_design_table
print(gen_design_table(fun_control))

| name           | type   | default   |   lower |   upper | transform             |
|----------------|--------|-----------|---------|---------|-----------------------|
| l1             | int    | 3         |     3   |    8    | transform_power_2_int |
| epochs         | int    | 4         |     4   |    9    | transform_power_2_int |
| batch_size     | int    | 4         |     1   |    4    | transform_power_2_int |
| act_fn         | factor | ReLU      |     0   |    5    | None                  |
| optimizer      | factor | SGD       |     0   |   11    | None                  |
| dropout_prob   | float  | 0.01      |     0   |    0.25 | None                  |
| lr_mult        | float  | 1.0       |     0.1 |   10    | None                  |
| patience       | int    | 2         |     2   |    6    | transform_power_2_int |
| initialization | factor | Default   |     0   |    2    | None                  |


## Get Default Values

In [17]:
# from spotpython.hyperparameters.values import get_default_hyperparameters_as_array
# import logging
# import numpy as np
# import pprint
# from numpy.random import default_rng
# # from spotpython.light.trainmodel import train_model
# from spotpython.hyperparameters.values import assign_values, generate_one_config_from_var_dict, get_var_name, get_one_config_from_X

# X = get_default_hyperparameters_as_array(fun_control)
# var_dict = assign_values(X, get_var_name(fun_control))
# for config in generate_one_config_from_var_dict(var_dict, fun_control):
#     print(config)
#     train_model(config, fun_control)


## Get the Tuned Values

In [18]:
X = np.array([[
            8.,
            8.,
            6.,
            2,
            2.,
            0.05448797,
            1.51706109, 
            4.,
            0
            ]])

In [19]:
from spotpython.hyperparameters.values import get_one_config_from_X

cfg = get_one_config_from_X(X, fun_control)
cfg

{'l1': 256,
 'epochs': 256,
 'batch_size': 64,
 'act_fn': ReLU(),
 'optimizer': 'Adam',
 'dropout_prob': 0.05448797,
 'lr_mult': 1.51706109,
 'patience': 16,
 'initialization': 'Default'}

In [20]:
L.seed_everything(42)
train_model(config=cfg, fun_control=fun_control)

Seed set to 42
/Users/bartz/miniforge3/envs/spotCondaEnv/lib/python3.11/site-packages/lightning/pytorch/utilities/parsing.py:198: Attribute 'act_fn' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['act_fn'])`.


train_model(): Test set size: 715
train_model(): Train set size: 1167
train_model(): Batch size: 64


Trainer already configured with model summary callbacks: [<class 'lightning.pytorch.callbacks.model_summary.ModelSummary'>]. Skipping setting a default `ModelSummary` callback.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

   | Name      | Type       | Params | In sizes  | Out sizes
------------------------------------------------------------------
0  | layers    | Sequential | 92.0 K | [64, 133] | [64, 1]  
1  | layers.0  | Linear     | 34.3 K | [64, 133] | [64, 256]
2  | layers.1  | ReLU       | 0      | [64, 256] | [64, 256]
3  | layers.2  | Dropout    | 0      | [64, 256] | [64, 256]
4  | layers.3  | Linear     | 32.9 K | [64, 256] | [64, 128]
5  | layers.5  | Dropout    | 0      | [64, 128] | [64, 128]
6  | layers.6  | Linear     | 16.5 K | [64, 128] | [64, 128]
7  | layers.8  | Dropout    | 0      | [64, 128] | [64, 128]
8  | layers.9  | Linear     | 8.3 K  | [64, 128] | [64,

LightDataModule: train_dataloader(). Training set size: 1167
LightDataModule: train_dataloader(). batch_size: 64
LightDataModule: train_dataloader(). num_workers: 0


train_model result: {'val_loss': 2880.50732421875, 'hp_metric': 2880.50732421875}


2880.50732421875