In [None]:
from neural_bandits.modules.neural_ucb_module import NeuralUCBBanditModule
from neural_bandits.datasets.statlog import StatlogDataset

import lightning as pl
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.utils.data import Subset
import random

In [None]:
class Network(nn.Module):
    def __init__(self, dim, hidden_size=100):
        super(Network, self).__init__()
        self.fc1 = nn.Linear(dim, hidden_size)
        self.activate = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, 1)
    def forward(self, x):
        return self.fc2(self.activate(self.fc1(x)))

In [None]:
dataset = StatlogDataset()
print(dataset.context_size)
print(len(dataset))

network = Network(dataset.context_size * dataset.num_actions, hidden_size=100)

n_samples = 15000
indices = list(range(len(dataset)))
random.shuffle(indices)
subset_indices = indices[:n_samples]
mini_dataset = Subset(dataset, subset_indices)
print("Subset size:", len(mini_dataset))

train_loader = DataLoader(mini_dataset, batch_size=1, shuffle=True)
model = NeuralUCBBanditModule(
        n_features = dataset.context_size * dataset.num_actions,
        network=network,
        early_stop_threshold=1e-3,
        num_grad_steps=1000,
        lambda_=0.00001,
        nu=0.00001,
        learning_rate=1e-3,
        train_freq=100,
        initial_train_steps=2000,
        max_grad_norm=20.0,
)

logger = pl.pytorch.loggers.CSVLogger("logs", name="neural_ucb", flush_logs_every_n_steps=100)
trainer = pl.Trainer(max_epochs=1, logger=logger, log_every_n_steps=1)

In [None]:
trainer.fit(model, train_loader)

In [None]:
# load metrics from the logger and plot
import pandas as pd
import numpy as np

metrics_csv = logger.log_dir + "/metrics.csv"
print(metrics_csv)
metrics = pd.read_csv(metrics_csv)
cumulative_reward = np.cumsum(metrics["reward"][:1000])
cumulative_regret = np.cumsum(metrics["regret"][:1000])

In [None]:
import matplotlib.pyplot as plt
plt.plot(cumulative_reward, label='Cumulative Reward')
plt.plot(cumulative_regret, label='Cumulative Regret')
plt.xlabel("steps")
plt.ylabel("cumulative reward/regret")
plt.legend()
plt.show()

In [None]:
# average reward
print(sum(metrics["reward"][:10]) / 10)
print(sum(metrics["reward"][:100]) / 100)
print(sum(metrics["reward"][:313]) / 313)

print(sum(metrics["regret"][:10]) / 10)
print(sum(metrics["regret"][:100]) / 100)
print(sum(metrics["regret"][:313]) / 313)

In [None]:
plt.plot(metrics["loss"][:1000], label='Loss')
plt.xlabel("steps")
plt.ylabel("loss")
plt.legend()
plt.show()