In [1]:
import logging

import lightning as pl
from lightning.pytorch.loggers.csv_logs import CSVLogger
import torch
from torch.utils.data import DataLoader, Subset

from neural_bandits.bandits.neural_linear_bandit import NeuralLinearBandit
from neural_bandits.benchmark.datasets.statlog import StatlogDataset

from neural_bandits.benchmark.environment import BanditBenchmarkEnvironment
from neural_bandits.benchmark.logger_decorator import OnlineBanditLoggerDecorator

In [2]:
class Encoder(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Encoder, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = torch.nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [3]:
dataset = StatlogDataset()
print(dataset.context_size)
print(len(dataset))

7
58000


In [None]:
accelerator = "mps"

train_loader = DataLoader(Subset(dataset, range(5000)), batch_size=32, shuffle=True)
env = BanditBenchmarkEnvironment(train_loader, device=accelerator)
bandit_module = NeuralLinearBandit(
        encoder = Encoder(
            input_dim = dataset.context_size * dataset.num_actions,
            hidden_dim = 64,
            output_dim = 32
        ),
        n_encoder_input_size = dataset.context_size * dataset.num_actions,
        n_embedding_size = 32,
).to(accelerator)

logging.getLogger("lightning.pytorch.utilities.rank_zero").setLevel(logging.FATAL)
logger = OnlineBanditLoggerDecorator(CSVLogger("logs", name="neural_linear_bandit", flush_logs_every_n_steps=100))

: 

In [None]:
for contextualized_actions in env:
    chosen_actions, _ = bandit_module.forward(contextualized_actions)

    trainer = pl.Trainer(max_epochs=1, logger=logger, log_every_n_steps=1, enable_progress_bar=False, enable_model_summary=False, enable_checkpointing=False, accelerator=accelerator)
    feedback = env.get_feedback(chosen_actions)
    batch_regret = env.compute_regret(chosen_actions)
    logger.pre_training_log({"regret": batch_regret.sum().item()})

    batch_dataloader = DataLoader(feedback, batch_size=16)
    trainer.fit(bandit_module, batch_dataloader)
    bandit_module = bandit_module.to(accelerator)

updated head


/Users/robert/miniconda3/envs/neural_bandits/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
  return torch.nn.functional.mse_loss(y_pred, y)


Step: 0 {'training_run': 0, 'reward': 10.0, 'epoch': 0, 'regret': 12.0}trained nn
updated head
Step: 1 {'training_run': 0, 'reward': 10.0, 'nn_loss': 0.2844904661178589, 'epoch': 0}updated head
Step: 1 {'training_run': 1, 'reward': 12.0, 'epoch': 0, 'regret': 7.0}trained nn
updated head
Step: 2 {'training_run': 1, 'reward': 13.0, 'nn_loss': 0.22689619660377502, 'epoch': 0}updated head
Step: 2 {'training_run': 2, 'reward': 11.0, 'epoch': 0, 'regret': 8.0}trained nn
updated head
Step: 3 {'training_run': 2, 'reward': 13.0, 'nn_loss': 0.2791798710823059, 'epoch': 0}updated head
Step: 3 {'training_run': 3, 'reward': 13.0, 'epoch': 0, 'regret': 7.0}trained nn
updated head
Step: 4 {'training_run': 3, 'reward': 12.0, 'nn_loss': 0.4391842484474182, 'epoch': 0}updated head
Step: 4 {'training_run': 4, 'reward': 15.0, 'epoch': 0, 'regret': 4.0}trained nn
updated head
Step: 5 {'training_run': 4, 'reward': 13.0, 'nn_loss': 0.11992986500263214, 'epoch': 0}updated head
Step: 5 {'training_run': 5, 'rew

In [None]:
# load metrics from the logger and plot
import pandas as pd
import numpy as np

metrics_csv = logger._logger_wrappee.log_dir + "/metrics.csv"
print(metrics_csv)
metrics = pd.read_csv(metrics_csv)
cumulative_reward = np.cumsum(metrics["reward"][:5000])
cumulative_regret = np.cumsum(metrics["regret"][:5000].dropna())

In [None]:
import matplotlib.pyplot as plt
plt.plot(cumulative_reward, label="reward")
plt.plot(cumulative_regret, label="regret")
plt.xlabel("steps")
plt.ylabel("cumulative reward/regret")
plt.legend()
plt.show()

In [None]:
# average reward
print(sum(metrics["reward"][:100]) / 100)
print(sum(metrics["reward"][:1000]) / 1000)
print(sum(metrics["reward"][:10000]) / 10000)
if "regret" in metrics:
    print(sum(metrics["regret"][:100].dropna()) / 100)
    print(sum(metrics["regret"][:1000].dropna()) / 1000)
    print(sum(metrics["regret"][:10000].dropna()) / 10000)

In [None]:
metrics

In [None]:
plt.plot(metrics["nn_loss"][:1000], label='Loss')
plt.xlabel("steps")
plt.ylabel("loss")
plt.legend()
plt.show()