### Install Dependencies

In [None]:
%pip install torch --quiet --quiet
%pip install coba --quiet --quiet
%pip install scipy --quiet --quiet
%pip install numpy --quiet --quiet
%pip install matplotlib --quiet --quiet
%pip install transformers --quiet --quiet
%pip install xformers --quiet --quiet
%pip install einops --quiet --quiet

### TODO
+ Change all 'reward' names to 'loss'
+ Move MyRewardPredictor back into this notebook

### Run Demo

In [None]:
import torch
import coba as cb
import numpy as np
from CappedIGW import CappedIGW
from ScaledL1Loss import MakeLosses
from PowerScheduler import PowerScheduler
from MyRewardPredictor import MyRewardPredictor
from UniformReferencePolicy import UniformReferencePolicy

#these values are specific to openml dataset 41540
n_context_dim, n_action_dim = 22, 1
tzero = 100
lr = 1e-2
batch_size = 8
n_batches = 2_500
n_processes = 4

if n_processes > 1:
    torch.set_num_threads(1)

fhat = MyRewardPredictor(
    numrff=1024,
    sigma=1e-1,
    in_features=n_context_dim+n_action_dim,
    opt_factory=lambda params: torch.optim.Adam(params,lr=lr),
    sched_factory=lambda opt: torch.optim.lr_scheduler.LambdaLR(opt, lr_lambda=PowerScheduler(tzero,-.5))
)

cb.Environments.cache_dir('.coba_cache')

env = cb.Environments.from_openml(data_id=41540,take=batch_size*n_batches).scale().filter(MakeLosses()).batch(batch_size)
lrn = [
    CappedIGW(mu=UniformReferencePolicy(), fhat=fhat, tau=50, gamma_scheduler = PowerScheduler(tz,.5)) 
    for tz in np.geomspace(1e-6,1e-3,4)
]

result = cb.Experiment(env,lrn).run(processes=n_processes)
result.plot_learners(span=100)
result.plot_learners(y='reward_prediction_loss', span=100)
result.plot_learners(y='reward_prediction_regret', span=100)

2023-07-19 11:07:02 -- pid-18980  -- Experiment Started
2023-07-19 11:07:02 -- pid-29792  -- Recording Evaluator 0 parameters... (0.0 seconds) (completed)
2023-07-19 11:07:04 -- pid-26532  -- Recording Learner 0 parameters... (0.0 seconds) (completed)
2023-07-19 11:07:04 -- pid-26532  -- Recording Environment 0 parameters... (0.0 seconds) (completed)
2023-07-19 11:07:04 -- pid-29792  -- Recording Learner 2 parameters... (0.0 seconds) (completed)
2023-07-19 11:07:04 -- pid-7608   -- Recording Learner 1 parameters... (0.0 seconds) (completed)
2023-07-19 11:07:04 -- pid-23276  -- Recording Learner 3 parameters... (0.0 seconds) (completed)
2023-07-19 11:07:05 -- pid-26532  -- Peeking at Environment 0... (0.78 seconds) (completed)
2023-07-19 11:07:05 -- pid-23276  -- Peeking at Environment 0... (0.8 seconds) (completed)
2023-07-19 11:07:05 -- pid-29792  -- Peeking at Environment 0... (0.82 seconds) (completed)
2023-07-19 11:07:05 -- pid-7608   -- Peeking at Environment 0... (0.86 seconds) (