In [84]:
import datetime
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns

import pandas as pd

%matplotlib inline

In [85]:
%run BanditLearning.ipynb

In [86]:
NUM_ARMS = 5
NUM_ITERATIONS = 2000
environment = Env(rewards=np.random.randint(1, 50, NUM_ARMS), deviations=np.random.randint(1, 10, NUM_ARMS))

In [87]:
# Random Agent
def RA_run():
    random_agent = RandomAgent(env=environment, max_iterations=2000)
    RA_history = random_agent.act()
    
    res = {"method": "RA", "iterations": NUM_ITERATIONS, "arms (k)": NUM_ARMS,
           "reward": sum(RA_history['rewards']), "time (ms)": RA_history['time']}
    
    return res
    
    # print(f"TOTAL REWARD : {sum(RA_history['rewards'])}")
    # print(f"TIME TAKEN (ms) : {RA_history['time']}")

    # plot_history(RA_history)

In [88]:
# Thompson Sampling
def TS_run():
    TS_history = TS_act(max_iterations=2000)
    
    res = {"method": "TS", "iterations": NUM_ITERATIONS, "arms (k)": NUM_ARMS,
           "reward": sum(TS_history['rewards']), "time (ms)": TS_history['time']}
    
    return res
    
    # print(f"TOTAL REWARD : {sum(TS_history['rewards'])}")
    # print(f"TIME TAKEN (ms) : {TS_history['time']}")

    # plot_history(TS_history)

In [89]:
# Epsilon Greedy
def EG_run():
    epsilon_greedy_agent = EpsilonGreedyAgent(env=environment, max_iterations=2000, epsilon=0.1)
    EG_history = epsilon_greedy_agent.act()
    
    res = {"method": "EG", "iterations": NUM_ITERATIONS, "arms (k)": NUM_ARMS,
           "reward": sum(EG_history['rewards']), "time (ms)": EG_history['time']}
    
    return res
    
    # print(f"TOTAL REWARD : {sum(EG_history['rewards'])}")
    # print(f"TIME TAKEN (ms) : {EG_history['time']}")

    # plot_history(EG_history)

In [90]:
# Upper Confidence Bound
def UCB_run():
    UCB_agent = UpperConfidenceBoundAgent(env=environment, max_iterations=2000)
    UCB_history = UCB_agent.act()
    
    res = {"method": "UCB", "iterations": NUM_ITERATIONS, "arms (k)": NUM_ARMS,
           "reward": sum(UCB_history['rewards']), "time (ms)": UCB_history['time']}
    
    return res

    # print(f"TOTAL REWARD : {sum(UCB_history['rewards'])}")
    # print(f"TIME TAKEN (ms) : {UCB_history['time']}")

    # plot_history(UCB_history)

In [91]:
# UCB Pick and Compare
def UCB_PC_run():
    UCB_PC_agent = UCB_PickAndCompareAgent(env=environment, max_iterations=2000)
    UCB_PC_history = UCB_PC_agent.act()
    
    res = {"method": "UCB_PC", "iterations": NUM_ITERATIONS, "arms (k)": NUM_ARMS,
           "reward": sum(UCB_PC_history['rewards']), "time (ms)": UCB_PC_history['time']}
    
    return res

    # print(f"TOTAL REWARD : {sum(UCB_PC_history['rewards'])}")
    # print(f"TIME TAKEN (ms) : {UCB_PC_history['time']}")

    # plot_history(UCB_PC_history)

In [92]:
results = pd.DataFrame()

In [93]:
arm_arr = [1, 10, 100, 1000, 5000, 10000]
iter_arr = [100, 500, 1000, 2000, 5000, 10000]

for i in range(len(arm_arr)):
    for j in range(len(iter_arr)):
        NUM_ARMS = arm_arr[i]
        NUM_ITERATIONS = iter_arr[j]
        
        results = results.append(RA_run(), ignore_index=True)
        results = results.append(TS_run(), ignore_index=True)
        results = results.append(EG_run(), ignore_index=True)
        results = results.append(UCB_run(), ignore_index=True)
        results = results.append(UCB_PC_run(), ignore_index=True)

# test = {"bruh": [1,2,3], "bruh2": 2}
# results = results.append(test, ignore_index=True)

In [94]:
results.head()

Unnamed: 0,method,iterations,arms (k),reward,time (ms)
0,RA,100.0,1.0,34169.281156,52.032
1,TS,100.0,1.0,58042.690108,55.969
2,EG,100.0,1.0,55174.014106,31.966
3,UCB,100.0,1.0,58010.555448,50.999
4,UCB_PC,100.0,1.0,57910.415321,64.031


In [95]:
results.to_csv('bandit_data.csv')