In [1]:
import random
import logging

import lightning as pl
from lightning.pytorch.loggers.csv_logs import CSVLogger
import torch.nn as nn
from torch.utils.data import DataLoader, Subset

from neural_bandits.bandits.neural_ucb_bandit import NeuralUCBBandit
from neural_bandits.benchmark.datasets.mnist import MNISTDataset
from neural_bandits.utils.data_storage import InMemoryDataBuffer, AllDataBufferStrategy
from neural_bandits.utils.data_sampler import SortedDataSampler

from neural_bandits.benchmark.environment import BanditBenchmarkEnvironment
from neural_bandits.benchmark.logger_decorator import OnlineBanditLoggerDecorator

In [None]:
class Network(nn.Module):
    """A simple neural network with one hidden layer."""

    def __init__(self, dim: int, hidden_size: int = 100):
        """Initialize the network.

        Args:
            dim: The dimension of the input.
            hidden_size: The number of hidden units.
        """
        super().__init__()
        self.fc1 = nn.Linear(dim, hidden_size)
        self.activate = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        """Forward pass of the network."""
        return self.fc2(self.activate(self.fc1(x)))

In [None]:
dataset = MNISTDataset()
print(dataset.context_size)
print(len(dataset))

network = Network(dataset.context_size, hidden_size=100)

accelerator = "cpu"
batch_size = 1
n_samples = 1000

indices = list(range(len(dataset)))
random.shuffle(indices)
subset_indices = indices[:n_samples]
mini_dataset = Subset(dataset, subset_indices)
print("Subset size:", len(mini_dataset))
sorted_sampler = SortedDataSampler(
    data_source=Subset(dataset, range(10000)),
    key_fn=lambda idx: int(dataset.y[idx]),
)

In [None]:
buffer = InMemoryDataBuffer(
    buffer_strategy=AllDataBufferStrategy(),
    max_size=10000,
    device=accelerator,
)

In [5]:
accelerator = "cpu"

train_loader = DataLoader(Subset(dataset, range(10000)), sampler=sorted_sampler, batch_size=32)
env = BanditBenchmarkEnvironment(train_loader, device=accelerator)
bandit_module = NeuralUCBBandit(
    n_features=dataset.context_size * dataset.num_actions,
    network=network,
    buffer=buffer,
    train_batch_size=batch_size,
    early_stop_threshold=1e-3,
    num_grad_steps=1000,
    lambda_=0.00001,
    nu=0.00001,
    learning_rate=1e-4,
    train_interval=50,
    initial_train_steps=2000,
    max_grad_norm=20.0,
).to(accelerator)

logging.getLogger("lightning.pytorch.utilities.rank_zero").setLevel(logging.FATAL)
logger = OnlineBanditLoggerDecorator(CSVLogger("logs", name="neural_ucb_bandit", flush_logs_every_n_steps=100))

Step: 6 {'training_run': 0, 'loss': 0.006282281130552292, 'reward': 1.0, 'epoch': 0, 'regret': tensor(0.)}}

c:\Users\Philipp\miniconda3\envs\neural_bandits\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Step: 100 {'training_run': 3, 'loss': 0.20809854567050934, 'reward': 0.0, 'epoch': 0, 'regret': tensor(1.)}

In [None]:
total_regret = 0
n_samples = 0

for contextualized_actions in env:
    chosen_actions, _ = bandit_module.forward(contextualized_actions)

    trainer = pl.Trainer(
        max_epochs=1,
        logger=logger,
        log_every_n_steps=1,
        enable_progress_bar=False,
        enable_model_summary=False,
        enable_checkpointing=False,
        accelerator=accelerator,
    )
    feedback = env.get_feedback(chosen_actions)
    batch_regret = env.compute_regret(chosen_actions)

    total_regret += batch_regret.sum().item()
    n_samples += batch_size

    logger.pre_training_log(
        {
            "regret": batch_regret.sum().item(),
            "average_regret": total_regret / n_samples,
        }
    )

    batch_dataloader = DataLoader(feedback, batch_size=32)
    trainer.fit(bandit_module, batch_dataloader)
    bandit_module = bandit_module.to(accelerator)

In [7]:
# load metrics from the logger and plot
import pandas as pd
import numpy as np

metrics_csv = logger._logger_wrappee.log_dir + "/metrics.csv"
print(metrics_csv)
metrics = pd.read_csv(metrics_csv)
cumulative_reward = np.cumsum(metrics["reward"][:10000])
cumulative_regret = np.cumsum(metrics["regret"][:10000].dropna())

In [None]:
import matplotlib.pyplot as plt

plt.plot(cumulative_reward, label="Cumulative Reward")
plt.plot(cumulative_regret, label="Cumulative Regret")
plt.xlabel("steps")
plt.ylabel("cumulative reward/regret")
plt.legend()
plt.show()

In [None]:
# average reward
print(sum(metrics["reward"][:10]) / 10)
print(sum(metrics["reward"][:100]) / 100)
print(sum(metrics["reward"][:10000]) / 10000)

print(sum(metrics["regret"][:10].dropna()) / 10)
print(sum(metrics["regret"][:100].dropna()) / 100)
print(sum(metrics["regret"][:10000].dropna()) / 10000)

In [None]:
plt.plot(metrics["loss"][:10000].dropna(), label="Loss")
plt.xlabel("steps")
plt.ylabel("loss")
plt.legend()
plt.show()