In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


from simulator import ABTestSimulator, EpsilonGreedySimulator, ThompsonSamplingSimulator,\
                       UCBSimulator, UCBBayesSimulator, UpperCredibleChoiceSimulator,\
                       DiscountedThompsonSamplingSimulator, SlidingWidowUCBSimulator,\
                       SlidingWidowThompsonSamplingSimulator

import warnings
warnings.filterwarnings("ignore")

### Run and save results 

####  You can skip this section and import the previously saved results in the next section

In [None]:
n_rounds = 100000
n_simulations = 50
epsilon=0.1
bandit_probs = [0.35, 0.40, 0.30, 0.25]
n_tests = 1000
gamma = 0.1
discount = 0.95

In [None]:
egreedy_results = EpsilonGreedySimulator(epsilon, n_simulations,n_rounds,bandit_probs).simulate()    

In [None]:
exp3_results = Exp3Simulator(gamma, n_simulations,n_rounds,bandit_probs).simulate()

In [None]:
ts_results = ThompsonSamplingSimulator(n_simulations,n_rounds,bandit_probs).simulate()   

In [None]:
ucb_results = UCBSimulator(n_simulations,n_rounds,bandit_probs).simulate() 

In [None]:
ucb_bayes_results = UCBBayesSimulator(n_simulations,n_rounds,bandit_probs).simulate()   

In [None]:
ucc_results = UpperCredibleChoiceSimulator(n_simulations,n_rounds,bandit_probs).simulate() 

In [None]:
dts_results = DiscountedThompsonSamplingSimulator(discount, n_simulations,n_rounds,bandit_probs).simulate()

In [None]:
sw_ucb_results = SlidingWidowUCBSimulator(n_simulations,n_rounds,bandit_probs,sliding_window=True,window_length=100).simulate()

In [None]:
sw_ts_results =SlidingWidowThompsonSamplingSimulator(n_simulations,n_rounds,
                                                     bandit_probs,sliding_window=True,window_length=100).simulate()

In [None]:
abtest_results = ABTestSimulator(n_tests, n_simulations, n_rounds, bandit_probs).simulate()

In [None]:
egreedy_results_df = pd.DataFrame(egreedy_results)
egreedy_avg_results_df = egreedy_results_df.groupby('round id', as_index=False).mean()
egreedy_avg_results_df.to_csv(r'results/egreedy_results.csv')

In [None]:
exp3_results_df = pd.DataFrame(exp3_results)
exp3_avg_results_df = exp3_results_df.groupby('round id', as_index=False).mean()
exp3_avg_results_df.to_csv(r'results/exp3_results.csv')

In [None]:
ts_results_df = pd.DataFrame(ts_results)
ts_avg_results_df = ts_results_df.groupby('round id', as_index=False).mean()
ts_avg_results_df.to_csv(r'results/ts_results.csv')

In [None]:
ucb_results_df = pd.DataFrame(ucb_results)
ucb_avg_results_df = ucb_results_df.groupby('round id', as_index=False).mean()
ucb_avg_results_df.to_csv(r'results/ucb_results.csv')

In [None]:
ucb_bayes_results_df = pd.DataFrame(ucb_bayes_results)
ucb_bayes_avg_results_df = ucb_bayes_results_df.groupby('round id', as_index=False).mean()
ucb_bayes_avg_results_df.to_csv(r'results/ucb_bayes_results.csv')

In [None]:
ucc_results_df = pd.DataFrame(ucc_results)
ucc_avg_results_df = ucc_results_df.groupby('round id', as_index=False).mean()
ucc_avg_results_df.to_csv(r'results/ucc_results.csv')

In [None]:
dts_results_df = pd.DataFrame(dts_results)
dts_avg_results_df = dts_results_df.groupby('round id', as_index=False).mean()
dts_avg_results_df.to_csv(r'results/dts_results.csv')

In [None]:
sw_ucb_results_df = pd.DataFrame(sw_ucb_results)
sw_ucb_avg_results_df = sw_ucb_results_df.groupby('round id', as_index=False).mean()
sw_ucb_avg_results_df.to_csv(r'results/sw_ucb_results.csv')

In [None]:
sw_ts_results_df = pd.DataFrame(sw_ts_results)
sw_ts_avg_results_df = sw_ts_results_df.groupby('round id', as_index=False).mean()
sw_ts_avg_results_df.to_csv(r'results/sw_ts_results.csv')

In [None]:
abtest_results_df = pd.DataFrame(abtest_results)
abtest_avg_results_df = abtest_results_df.groupby('round id', as_index=False).mean()
abtest_avg_results_df.to_csv(r'results/abtest_results.csv')

### Import results

In [None]:
egreedy_avg_results_df = pd.read_csv('results/egreedy_results.csv',index_col=0)
ts_avg_results_df=pd.read_csv('results/ts_results.csv',index_col=0)
ucb_avg_results_df=pd.read_csv('results/ucb_results.csv',index_col=0)
ucb_bayes_avg_results_df=pd.read_csv('results/ucb_bayes_results.csv',index_col=0)
ucc_avg_results_df=pd.read_csv('results/ucc_results.csv',index_col=0)
dts_avg_results_df=pd.read_csv('results/dts_results.csv',index_col=0)
sw_ucb_avg_results_df=pd.read_csv('results/sw_ucb_results.csv',index_col=0)
sw_ts_avg_results_df=pd.read_csv('results/sw_ts_results.csv',index_col=0)
abtest_avg_results_df=pd.read_csv('results/abtest_results.csv',index_col=0)

### Plot cumulative regret and rewards for stationary bandits 

In [None]:
sns.set(font_scale=2.5, style = 'whitegrid')

In [None]:
fig, ax = plt.subplots(figsize=(12,10))

for avg_results_df in [abtest_avg_results_df,
                       egreedy_avg_results_df,
                       ucb_avg_results_df,
                       ucc_avg_results_df,
                       ucb_bayes_avg_results_df,
                       ts_avg_results_df]:
    
    
    ax.plot(avg_results_df['round id'], avg_results_df['cumulative expected reward'], linewidth=3.5)
    
# add lines for the optimal values 
#for p in bandit_probs:
ax.axhline(y=max(bandit_probs), color='k', linestyle=':', linewidth=2.5)
        
ax.set_title('Comparision of cumulative expected reward \n for each method in {} simulations'.format(n_simulations))
ax.set_xlabel('Rounds #')
ax.set_ylabel('Cumulative Expected Reward')

# ax.set_xticks(range(0,22000,5000))
#ax.set_ylim(0.25, 0.42)
#ax.set_yticks(np.array(bandit_probs))

# rescale the y-axis tick labels to show them as a percentage
# ax.set_yticklabels((ax.get_yticks()*100).astype(int))

ax.legend(['A/B Test (1k test)',
           '$\epsilon$-Greedy = 0.1',
           'UCB',
           'Upper Credible Choice',
           'UCB-Bayes',
           'Thompson Sampling', 'Optimal'], loc='best', ncol=1,fontsize=20)

plt.tight_layout()
plt.savefig(r'Results/cumulative expected-reward stationary.png')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12,10))

for avg_results_df in [abtest_avg_results_df,
                       egreedy_avg_results_df,
                       ucb_avg_results_df,
                       ucc_avg_results_df,
                       ucb_bayes_avg_results_df,
                       ts_avg_results_df]:
    
    
    ax.plot(avg_results_df['round id'], avg_results_df['cumulative regret'], linewidth=3.5)
    

        
ax.set_title('Comparision of cumulative regret \n for each method in {} simulations'.format(n_simulations))
ax.set_xlabel('Rounds #')
ax.set_ylabel('Cumulative Regret')

# ax.set_xticks(range(0,22000,5000))
# ax.set_ylim(0.2, 0.6)
# ax.set_yticks(np.arange(0.2, 0.7, 0.1))

# rescale the y-axis tick labels to show them as a percentage
# ax.set_yticklabels((ax.get_yticks()*100).astype(int))

ax.legend(['A/B Test (1k test)',
           '$\epsilon$-Greedy = 0.1',
           'UCB',
           'Upper Credible Choice',
           'UCB-Bayes',
           'Thompson Sampling','Optimal'],loc='best',fontsize=20)

plt.tight_layout()
plt.savefig(r'Results/cumulative regret stationary.png')
plt.show()

### Plot cumulative regret and rewards for non-stationary bandits 

In [None]:
sw_ucb_avg_results_df=pd.read_csv('results/sw_ucb_results.csv',index_col=0)
sw_ts_avg_results_df=pd.read_csv('results/sw_ts_results.csv',index_col=0)
abtest_avg_results_df=pd.read_csv('results/abtest_results.csv',index_col=0)

fig, ax = plt.subplots(figsize=(12,10))

for avg_results_df in [sw_ucb_avg_results_df,
                       sw_ts_avg_results_df,
                       dts_avg_results_df]:
    
    
    ax.plot(avg_results_df['round id'], avg_results_df['cumulative expected reward'], linewidth=3.5)
    
# add lines for the optimal values 
#for p in bandit_probs:
ax.axhline(y=max(bandit_probs), color='k', linestyle=':', linewidth=2.5)
        
ax.set_title('Comparision of cumulative expected reward \n for each method in {} simulations'.format(n_simulations))
ax.set_xlabel('Rounds #')
ax.set_ylabel('Cumulative Expected Reward')

# ax.set_xticks(range(0,22000,5000))
#ax.set_ylim(0.25, 0.42)
#ax.set_yticks(np.array(bandit_probs))

# rescale the y-axis tick labels to show them as a percentage
# ax.set_yticklabels((ax.get_yticks()*100).astype(int))

ax.legend(['Sliding Window UCB',
           'Sliding Window Thompson Sampling',
           'Discounted Thompson Sampling',
           'Optimal'], loc='best', ncol=1,fontsize=20)

plt.tight_layout()
plt.savefig(r'Results/cumulative expected-reward non-stationary.png')
plt.show()

In [None]:
sw_ucb_avg_results_df=pd.read_csv('results/sw_ucb_results.csv',index_col=0)
sw_ts_avg_results_df=pd.read_csv('results/sw_ts_results.csv',index_col=0)
abtest_avg_results_df=pd.read_csv('results/abtest_results.csv',index_col=0)

fig, ax = plt.subplots(figsize=(12,10))

for avg_results_df in [sw_ucb_avg_results_df,
                       sw_ts_avg_results_df,
                       dts_avg_results_df]:
    
    
    ax.plot(avg_results_df['round id'], avg_results_df['cumulative regret'], linewidth=3.5)
    
        
ax.set_title('Comparision of cumulative regret \n for each method in {} simulations'.format(n_simulations))
ax.set_xlabel('Rounds #')
ax.set_ylabel('Cumulative Expected Reward')

# ax.set_xticks(range(0,22000,5000))
#ax.set_ylim(0.25, 0.42)
#ax.set_yticks(np.array(bandit_probs))

# rescale the y-axis tick labels to show them as a percentage
# ax.set_yticklabels((ax.get_yticks()*100).astype(int))

ax.legend(['Sliding Window UCB',
           'Sliding Window Thompson Sampling',
           'Discounted Thompson Sampling',
           'Optimal'], loc='best', ncol=1,fontsize=20)

plt.tight_layout()
plt.savefig(r'Results/cumulative regret non-stationary.png')
plt.show()

### Plot and get an animation of evolution of estimated probability of reward distribuations 

In [None]:
n_rounds = 300
n_simulations = None
epsilon=0.1
bandit_probs = [0.35, 0.40, 0.30, 0.25]
bandit_colors = ['red', 'green', 'blue', 'purple']


In [None]:
plot_title = 'Thompson Sampling decision policy: probability matching'
ThompsonSamplingSimulator(n_simulations,n_rounds,bandit_probs).plot_MAB_experiment(bandit_colors, plot_title)

In [None]:
plot_title = 'Epsilon-Greedy'
EpsilonGreedySimulator(epsilon, n_simulations,n_rounds,bandit_probs).plot_MAB_experiment(bandit_colors, plot_title)