In [1]:
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from IPython.display import display
import gc

In [2]:
def process(df, is_thompson):
    ## get cumulative reward
    cumsum_base = np.array([], dtype='float')
    for e in df['sim'].unique():
        rundf = df[df['sim'] == e]
        run_cumsum = np.cumsum(rundf['reward'])
        cumsum_base = np.append(cumsum_base, run_cumsum)
    df['cumulative_reward'] = cumsum_base
    
    ## check if the optimal arm is selected at each run
    df['chosen_optimal'] = (df['chosen_arm'] == df['optimal_arm']).astype('int')
    
    ## groupby
    if is_thompson:
        result = df[['step', 'reward', 'cumulative_reward', 'chosen_optimal']].groupby(by='step').mean(numeric_only=False)
    else:
        result = df[['param', 'step', 'reward', 'chosen_optimal']].groupby(by=['param', 'step']).mean(numeric_only=False)
    result.reset_index(drop=False, inplace=True)
    
    return result

In [3]:
def plot_results(dfs, column, title, model, figsize=(12, 8)):
    split = column.split('_')
    split[0] = split[0][0].upper() + split[0][1:]
    ylabel = ' '.join(split)
    
    plt.figure(figsize=figsize)
    if model == "thompson":
        plt.plot(dfs[column])
    else:  
        for item in dfs:
            if model == "ucb":
                plt.plot(item[column], label=f"conf={item['param'].iloc[0]}")
            elif model == "mab":
                plt.plot(item[column], label=f"epsilon={item['param'].iloc[0]}")
            elif model == "etc":
                plt.plot(item[column], label=f"explore={item['param'].iloc[0]}")
        plt.legend()
    plt.grid(True)
    plt.xlabel("Step")
    plt.ylabel(ylabel)
    plt.title(title)
    plt.show()