In [2]:
from neural_bandits.bandits.linear_ts_bandit import LinearTSBandit
from neural_bandits.benchmark.datasets.statlog import StatlogDataset

import lightning as pl
from torch.utils.data import DataLoader
import torch

In [3]:
dataset = StatlogDataset()
print(dataset.context_size)
print(len(dataset))

train_loader = DataLoader(dataset, batch_size=4, shuffle=True)
model = LinearTSBandit(
        n_features = dataset.context_size * dataset.num_actions,
)
logger = pl.pytorch.loggers.CSVLogger("logs", name="linear_bandit", flush_logs_every_n_steps=100)

7
58000


In [4]:
class BanditDataset(torch.utils.data.Dataset):
    def __init__(self, contextualized_actions, all_rewards, chosen_actions):
        super().__init__()
        self.contextualized_actions = contextualized_actions
        self.all_rewards = all_rewards
        self.chosen_actions = chosen_actions
        mask = self.chosen_actions.bool()
        
        self.chosen_contextualized_actions = torch.masked_select(
            self.contextualized_actions, mask.unsqueeze(-1).expand_as(self.contextualized_actions)
        ).view(self.contextualized_actions.size(0), -1, self.contextualized_actions.size(-1))

        self.realized_rewards = (self.all_rewards * self.chosen_actions.float()).masked_select(mask).view(self.all_rewards.size(0), -1)

    def __len__(self):
        return self.realized_rewards.size(0)

    def __getitem__(self, idx):
        return self.chosen_contextualized_actions[idx], self.realized_rewards[idx]

In [7]:
for contextualized_actions, all_rewards in train_loader:
    chosen_actions, _ = model.forward(contextualized_actions)

    trainer = pl.Trainer(max_epochs=1, logger=logger, log_every_n_steps=1, enable_progress_bar=False)
    update = BanditDataset(contextualized_actions, all_rewards, chosen_actions)
    batch_dataloader = DataLoader(update, batch_size=1)
    trainer.fit(model, batch_dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
c:\Users\Philipp\miniconda3\envs\neural_bandits\lib\site-packages\lightning\pytorch\core\optimizer.py:183: `LightningModule.configure_optimizers` returned `None`, this fit will run with no optimizer

  | Name | Type | Params | Mode
-------------------------------------
-------------------------------------
0         Trainable params
0         Non-trainable params
0         Total params
0.000     Total estimated model params size (MB)
0         Modules in train mode
0         Modules in eval mode
c:\Users\Philipp\miniconda3\envs\neural_bandits\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
`Trainer.fit` stopped: `max_epochs=1` reached.
GPU available

KeyboardInterrupt: 

In [None]:
# load metrics from the logger and plot
import pandas as pd
import numpy as np

metrics_csv = logger.log_dir + "/metrics.csv"
print(metrics_csv)
metrics = pd.read_csv(metrics_csv)
cumulative_reward = np.cumsum(metrics["reward"][:1000])
cumulative_regret = np.cumsum(metrics["regret"][:1000])

In [None]:
import matplotlib.pyplot as plt
plt.plot(cumulative_reward)
plt.plot(cumulative_regret)
plt.xlabel("steps")
plt.ylabel("cumulative reward/regret")
plt.show()

In [None]:
# average reward
print(sum(metrics["reward"][:100]) / 100)
print(sum(metrics["reward"][:1000]) / 1000)
print(sum(metrics["reward"][:10000]) / 10000)

print(sum(metrics["regret"][:100]) / 100)
print(sum(metrics["regret"][:1000]) / 1000)
print(sum(metrics["regret"][:10000]) / 10000)
