In [1]:
import pickle
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from IPython.display import display

In [2]:
!ls

Bandit_Analysis.ipynb
Bandit_experiment.ipynb
UCB_BernoulliArm_1.0_Naive_results.pkl
UCB_GaussianArm_1.0_Naive_results.pkl
[1m[36m__pycache__[m[m
arms.py
base_analysis.ipynb
cfg.py
data.py
eGreedyMAB_BernoulliArm_0.5_Naive_results.pkl
eGreedyMAB_BernoulliArm_0.5_Optimistic_results.pkl
eGreedyMAB_BernoulliArm_1.0_Naive_results.pkl
eGreedyMAB_BernoulliArm_1.0_Optimistic_results.pkl
eGreedyMAB_GaussianArm_0.5_Naive_results.pkl
eGreedyMAB_GaussianArm_0.5_Optimistic_results.pkl
eGreedyMAB_GaussianArm_1.0_Naive_results.pkl
eGreedyMAB_GaussianArm_1.0_Optimistic_results.pkl
linucb.py
mab.py
main.py


In [3]:
def process(df, is_ucb=True):
    ## get cumulative reward
    cumsum_base = np.array([], dtype='float')
    for e in df['sim'].unique():
        rundf = df[df['sim'] == e]
        run_cumsum = np.cumsum(rundf['reward'])
        cumsum_base = np.append(cumsum_base, run_cumsum)
    df['cumulative_reward'] = cumsum_base
    
    ## check if the optimal arm is selected at each run
    df['chosen_optimal'] = (df['chosen_arm'] == df['optimal_arm']).astype('int')
    
    ## groupby
    if is_ucb:
        result = df[['conf', 'step', 'reward', 'chosen_optimal']].groupby(by=['epsilon', 'step']).mean(numeric_only=False)
    else:
        result = df[['epsilon', 'step', 'reward', 'chosen_optimal']].groupby(by=['epsilon', 'step']).mean(numeric_only=False)
    result.reset_index(drop=False, inplace=True)
    
    return result

In [4]:
def plot_results(dfs, column, title, figsize=(12, 8), is_ucb=True):
    split = column.split('_')
    split[0] = split[0][0].upper() + split[0][1:]
    ylabel = ' '.join(split)
    
    plt.figure(figsize=figsize)
    for item in dfs:
        if is_ucb:
            plt.plot(item[column], label=f"eps={item['conf'].iloc[0]}")
        else:
            plt.plot(item[column], label=f"eps={item['epsilon'].iloc[0]}")
    plt.grid(True)
    plt.xlabel("Step")
    plt.ylabel(ylabel)
    plt.legend()
    plt.title(title)
    plt.show()

# Naive approach

## alpha = 1

In [5]:
with open("./eGreedyMAB_BernoulliArm_1.0_Naive_results.pkl", 'rb') as f:
    bernoulli = pickle.load(f)
    
with open("./eGreedyMAB_GaussianArm_1.0_Naive_results.pkl", 'rb') as f:
    gaussian = pickle.load(f)

In [6]:
bern_results_naive_1 = []
for df in tqdm(bernoulli):
    bern_results_naive_1.append(process(df), False)
    
gaus_results_naive_1 = []
for df in tqdm(gaussian):
    gaus_results_naive_1.append(process(df), False)

  0%|                                                                                                        | 0/6 [00:02<?, ?it/s]


KeyError: "['c'] not in index"

### Optimal Action %

In [None]:
plot_results(bern_results_naive_1, column='chosen_optimal', title="Naive Approach - Bernoulli Arms, alpha=1")

In [None]:
plot_results(gaus_results_naive_1, column='chosen_optimal', title="Naive Approach - Gaussian Arms, alpha=1")

### Average Reward

In [None]:
plot_results(bern_results_naive_1, column='reward', title="Naive Approach - Bernoulli Arms, alpha=1")

In [None]:
plot_results(gaus_results_naive_1, column='reward', title="Naive Approach - Gaussian Arms, alpha=1")

## alpha=0.5

In [None]:
with open("./eGreedyMAB_BernoulliArm_0.5_Naive_results.pkl", 'rb') as f:
    bernoulli_naive_half = pickle.load(f)
    
with open("./eGreedyMAB_GaussianArm_0.5_Naive_results.pkl", 'rb') as f:
    gaussian_naive_half = pickle.load(f)

In [None]:
bern_results_naive_half = []
for df in tqdm(bernoulli_naive_half):
    bern_results_naive_half.append(process(df))
    
gaus_results_naive_half = []
for df in tqdm(gaussian_naive_half):
    gaus_results_naive_half.append(process(df))

### Optimal action %

In [None]:
plot_results(bern_results_naive_half, column='chosen_optimal', title="Naive Approach - Bernoulli Arms, alpha=0.5")

In [None]:
plot_results(gaus_results_naive_half, column='chosen_optimal', title="Naive Approach - Gaussian Arms, alpha=0.5")

### Average Reward

In [None]:
plot_results(bern_results_naive_half, column='reward', title="Naive Approach - Bernoulli Arms, alpha=0.5")

In [None]:
plot_results(gaus_results_naive_half, column='reward', title="Naive Approach - Gaussian Arms, alpha=0.5")

# Optimistic Approach

## alpha=1

In [None]:
with open("./eGreedyMAB_BernoulliArm_1.0_Optimistic_results.pkl", 'rb') as f:
    bernoulli_opt_1 = pickle.load(f)
    
with open("./eGreedyMAB_GaussianArm_1.0_Optimistic_results.pkl", 'rb') as f:
    gaussian_opt_1 = pickle.load(f)

In [None]:
bern_results_opt_1 = []
for df in tqdm(bernoulli_opt_1):
    bern_results_opt_1.append(process(df))
    
gaus_results_opt_1 = []
for df in tqdm(gaussian_opt_1):
    gaus_results_opt_1.append(process(df))

In [None]:
print(f"Bernoulli\n")
print("Greedy")
display(bern_results_opt_1[0].head())
print()
display(bern_results_opt_1[0].tail())
print()

print("egreedy")
display(bern_results_opt_1[1].head())
print()
display(bern_results_opt_1[1].tail())

### Optimal action %

In [None]:
plot_results(bern_results_opt_1, column='chosen_optimal', title="Optimistic Approach - Bernoulli Arms, alpha=1.0")

In [None]:
plot_results(gaus_results_opt_1, column='chosen_optimal', title="Optimistic Approach - Gaussian Arms, alpha=1.0")

### Average Reward

In [None]:
plot_results(bern_results_opt_1, column='reward', title="Optimistic Approach - Bernoulli Arms, alpha=1.0")

In [None]:
plot_results(gaus_results_opt_1, column='reward', title="Optimistic Approach - Gaussian Arms, alpha=1.0")

## alpha=0.5

In [None]:
with open("./eGreedyMAB_BernoulliArm_0.5_Optimistic_results.pkl", 'rb') as f:
    bernoulli_opt_half = pickle.load(f)
    
with open("./eGreedyMAB_GaussianArm_0.5_Optimistic_results.pkl", 'rb') as f:
    gaussian_opt_half = pickle.load(f)

In [None]:
bern_results_opt_half = []
for df in tqdm(bernoulli_opt_half):
    bern_results_opt_half.append(process(df))
    
gaus_results_opt_half = []
for df in tqdm(gaussian_opt_half):
    gaus_results_opt_half.append(process(df))

### Optimal action %

In [None]:
plot_results(bern_results_opt_half, column='chosen_optimal', 
             title="Optimistic Approach - Bernoulli Arms, alpha=0.5")

In [None]:
plot_results(gaus_results_opt_half, column='chosen_optimal', 
             title="Optimistic Approach - Gaussian Arms, alpha=0.5")

### Average Reward

In [None]:
plot_results(bern_results_opt_half, column='reward', 
             title="Optimistic Approach - Bernoulli Arms, alpha=0.5")

In [None]:
plot_results(gaus_results_opt_half, column='reward', 
             title="Optimistic Approach - Gaussian Arms, alpha=0.5")

# Comparison - Naive vs Optimistic

## Bernoulli alpha=1

### Optimal action %

In [None]:
## find the best epsilon
naive_mean_choices = [item['chosen_optimal'].mean() for item in bern_results_naive_1]
naive_argmax = np.argmax(naive_mean_choices)

print(f"Naive Approach\t Best epsilon - {bern_results_naive_1[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'chosen_optimal'
best_epsilon = bern_results_naive_1[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(bern_results_naive_1[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(bern_results_opt_1[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

### Average reward

In [None]:
## find the best epsilon
naive_mean_reward = [item['reward'].mean() for item in bern_results_naive_1]
naive_argmax = np.argmax(naive_mean_reward)

print(f"Naive Approach\t Best epsilon - {bern_results_naive_1[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'reward'
best_epsilon = bern_results_naive_1[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(bern_results_naive_1[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(bern_results_opt_1[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

## Bernoulli alpha=0.5

### Optimal action %

In [None]:
## find the best epsilon
naive_mean_choices = [item['chosen_optimal'].mean() for item in bern_results_naive_half]
naive_argmax = np.argmax(naive_mean_choices)

print(f"Naive Approach\t Best epsilon - {bern_results_naive_half[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'chosen_optimal'
best_epsilon = bern_results_naive_half[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(bern_results_naive_half[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(bern_results_opt_half[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

### Average reward

In [None]:
## find the best epsilon
naive_mean_reward = [item['reward'].mean() for item in bern_results_naive_half]
naive_argmax = np.argmax(naive_mean_reward)

print(f"Naive Approach\t Best epsilon - {bern_results_naive_half[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'reward'
best_epsilon = bern_results_naive_half[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(bern_results_naive_half[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(bern_results_opt_half[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

## Gaussian alpha=1

### Optimal action %

In [None]:
## find the best epsilon
naive_mean_choices = [item['chosen_optimal'].mean() for item in gaus_results_naive_1]
naive_argmax = np.argmax(naive_mean_choices)

print(f"Naive Approach\t Best epsilon - {gaus_results_naive_1[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'chosen_optimal'
best_epsilon = gaus_results_naive_1[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(gaus_results_naive_1[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(gaus_results_opt_1[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

### Average reward

In [None]:
## find the best epsilon
naive_mean_reward = [item['reward'].mean() for item in gaus_results_naive_1]
naive_argmax = np.argmax(naive_mean_reward)

print(f"Naive Approach\t Best epsilon - {gaus_results_naive_1[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'reward'
best_epsilon = gaus_results_naive_1[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(gaus_results_naive_1[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(gaus_results_opt_1[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

## Bernoulli alpha=0.5

### Optimal action %

In [None]:
## find the best epsilon
naive_mean_choices = [item['chosen_optimal'].mean() for item in gaus_results_naive_half]
naive_argmax = np.argmax(naive_mean_choices)

print(f"Naive Approach\t Best epsilon - {gaus_results_naive_half[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'chosen_optimal'
best_epsilon = gaus_results_naive_half[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(gaus_results_naive_half[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(gaus_results_opt_half[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

### Average reward

In [None]:
## find the best epsilon
naive_mean_reward = [item['reward'].mean() for item in gaus_results_naive_half]
naive_argmax = np.argmax(naive_mean_reward)

print(f"Naive Approach\t Best epsilon - {gaus_results_naive_half[naive_argmax]['epsilon'].iloc[0]}")

In [None]:
column = 'reward'
best_epsilon = gaus_results_naive_half[naive_argmax]['epsilon'].iloc[0]
figsize=(12, 8)

split = column.split('_')
split[0] = split[0][0].upper() + split[0][1:]
ylabel = ' '.join(split)

plt.figure(figsize=figsize)
plt.plot(gaus_results_naive_half[naive_argmax][column], label=f"Naive approach, eps={best_epsilon}")
plt.plot(gaus_results_opt_half[0][column], label=f"Optimistic approach, Greedy")
plt.grid(True)
plt.xlabel("Step")
plt.ylabel(ylabel)
plt.legend()
plt.title(f"Comparison {ylabel} - Naive vs Optimistic")
plt.show()

# UCB

In [None]:
with open("./UCB_BernoulliArm_1.0_Naive_results.pkl", 'rb') as f:
    ucb_bernoulli = pickle.load(f)
    
with open("./UCB_GaussianArm_1.0_Naive_results.pkl", 'rb') as f:
    ucb_gaussian = pickle.load(f)

In [None]:
ucb_bern_results = []
for df in tqdm(ucb_bernoulli):
    ucb_bern_results.append(process(df))
    
ucb_gaus_results = []
for df in tqdm(ucb_gaussian):
    ucb_gaus_results.append(process(df))

### Optimal Action %

In [None]:
plot_results(ucb_bern_results, column='chosen_optimal', title="Naive Approach - Bernoulli Arms, alpha=1")

In [None]:
plot_results(ucb_gaus_results, column='chosen_optimal', title="Naive Approach - Gaussian Arms, alpha=1")

### Average Reward

In [None]:
plot_results(ucb_bern_results, column='reward', title="Naive Approach - Bernoulli Arms, alpha=1")

In [None]:
plot_results(ucb_gaus_results, column='reward', title="Naive Approach - Gaussian Arms, alpha=1")