In [1]:
from benchmarks import UCB, TS, BuyAndHold
from agent import portfolio, stream
import numpy as np

In [2]:
T = 30
ucb = UCB(T, 10)
ts = TS(T, 10)
bh = BuyAndHold(T)

bh_p = portfolio()
ts_p = portfolio(initial_position=1)
ucb_p = portfolio(initial_position=1)

n_trials = 20
bh_perf = np.array([])
ts_perf = np.array([])
ucb_perf = np.array([])

In [3]:
for trial in range(n_trials):
  s = stream(seed = trial)
  for t in range(T):
    new_price = s.update()
    if t == 0:
      init = new_price

    # decide what to play
    a_ucb = ucb.pull_arm()
    a_ts = ts.pull_arm()

    # see price, get reward/loss
    r_ts = ts_p.update(a_ts, new_price)
    r_ucb = ucb_p.update(a_ucb, new_price)
    bh.update(new_price)

    # learn
    ucb.update(r_ucb)
    ts.update(r_ts)

  # store performances
  bh_perf = bh.get_perf()
  ts_perf = np.append(ts_perf, ts_p.profit/init)
  ucb_perf = np.append(ucb_perf, ucb_p.profit/init)

In [4]:
print(f'Average Performance for UCB Agent: {np.average(ucb_perf)}.')
print(f'Average Performance for TS Agent: {np.average(ts_perf)}.')
print(f'Average Performance for B&H Agent: {np.average(bh_perf)}.')

Average Performance for UCB Agent: -0.14705642501471344.
Average Performance for TS Agent: -0.09504973683590993.
Average Performance for B&H Agent: -0.08336944176861631.
