
## Multiple Armed Bandit.

### 3 Restaurants in Aachen with each with value as follows on expenditure of 5. Total no of tries is 300:-

a. mean 10 and std 2.5

b. mean 8 and std 4

c. mean 5 and std 2.5

In [142]:
import numpy as np
import pandas as pd
import random
from numba import jit

In [2]:
rest=['a','b','c']
mean=[10,8,5]
std=[2.5,4,2.5]

In [3]:
num_iter=300

In [4]:
Max_regret=num_iter*max(mean)
Max_avg_regret=Max_regret/num_iter

In [5]:
Max_regret

3000

In [6]:
Max_avg_regret

10.0

## NAIVE METHOD

In [7]:

def Naive_Method(items,mu,sigma,chances):
    total_naive=0.0
    for i in range(chances):
        naive_pick=np.random.randint(0,items)
        total_naive=total_naive+np.random.normal(mu[naive_pick],sigma[naive_pick])
    
    return(total_naive)

In [8]:
list_naive=np.array([])

for i in range(1000):
    
    list_naive=np.append(list_naive,Naive_Method(len(rest),mean,std,num_iter))

In [9]:
np.average(list_naive)

2298.041432573774

In [10]:
np.max(list_naive)

2553.8600165896305

In [11]:
np.min(list_naive)

2084.8768237814224

## Exploit only

In [12]:
def Exploit_Only_Method(items,mu,sigma,chances):
    explor=0.0
    selected=0.0
    total_exploit=0.0
    #exploration
    for i in range(items):
        tmp_exploit=np.random.normal(mu[i],sigma[i])
        total_exploit+=tmp_exploit
        if explor<tmp_exploit:
            explor=tmp_exploit
            selected=i
    #exploit        
    for i in range(chances-items):
            total_exploit+=np.random.normal(mu[selected],sigma[selected])
    
    return(total_exploit)

In [13]:
list_exploit=np.array([])

for i in range(1000):
    
    list_exploit=np.append(list_exploit,Exploit_Only_Method(len(rest),mean,std,num_iter))

In [14]:
np.average(list_exploit)

2733.675493207896

In [15]:
np.max(list_exploit)

3105.0377915537165

In [16]:
np.min(list_exploit)

1445.0556792475322

## 1-epsilon Exploit

In [187]:
def one_eps_exploit(items,mu,sigma,eps,chances):
    explor=0.0
    selected=0
    total_exploit=0.0
    tmp_all_exploit=[[] for j in range(items)]
    tmp_best_mean=0.0
    
    #exploration
    for i in range(int(eps*chances)):
        tmp_count=np.random.randint(0,items,size=1)[0]
        tmp_exploit=np.random.normal(mu[tmp_count],sigma[tmp_count])
        tmp_all_exploit[tmp_count].append(tmp_exploit)
        total_exploit+=tmp_exploit
    
    
    for i in range(items):
        explor=np.mean(tmp_all_exploit[i])
        if explor>tmp_best_mean:
            tmp_best_mean=explor
            selected=i
            

    #exploit        
    for i in range(chances-int(eps*chances)):
            total_exploit+=np.random.normal(mu[selected],sigma[selected])
    return(total_exploit)       

In [188]:
list_one_eps_exploit=np.array([])
for i in range(1000):
    list_one_eps_exploit=np.append(list_one_eps_exploit,one_eps_exploit(len(rest),mean,std,0.1,num_iter))

In [58]:
list_one_eps_exploit=np.append(list_one_eps_exploit,one_eps_exploit(len(rest),mean,std,0.1,num_iter))

In [189]:
np.average(list_one_eps_exploit)

2879.273247761452

In [190]:
np.max(list_one_eps_exploit)

3081.1275418255814

In [191]:
np.min(list_one_eps_exploit)

2249.2456080746942

## UCB 1

In [212]:
def UCB1(items,mu,sigma,chances):
    items=int(items)
    explor=0.0
    selected=0
    total_exploit=0.0
    tmp_all_exploit=[[] for j in range(items)]
    tmp_all_mean=np.zeros(items)
    tmp_all_atmpt_counts=np.zeros(items)
    
    
    #1st set of exploration
    ex_lim=1*int(items)
    for i in range(ex_lim):
        tmp_count=i%items
        tmp_exploit=np.random.normal(mu[tmp_count],sigma[tmp_count])
        tmp_all_exploit[tmp_count].append(tmp_exploit)
        tmp_all_mean[tmp_count]=np.mean(tmp_all_exploit[tmp_count])
        tmp_all_atmpt_counts[tmp_count]=len(tmp_all_exploit[tmp_count])
        total_exploit+=tmp_exploit
    
     

    #exploit        
    for i in range(chances-ex_lim):
        tmp_best_mean=0.0
        for j in range(items):
            explor=tmp_all_mean[j] + np.sqrt((2*np.log(ex_lim+i))/tmp_all_atmpt_counts[j])
            if explor>tmp_best_mean:
                tmp_best_mean=explor
                selected=j
        
        total_exploit+=np.random.normal(mu[selected],sigma[selected])
    return(total_exploit)


In [213]:
list_UCB1=np.array([])
for i in range(1000):
    list_UCB1=np.append(list_UCB1,UCB1(len(rest),mean,std,300))

In [214]:
np.average(list_UCB1)

2732.677400312021

In [215]:
np.max(list_UCB1)

3105.303777778429

In [216]:
np.min(list_UCB1)

1404.0153770557552

## New algo based on mean and variance

In [27]:
np.std([1])

0.0

In [195]:
def New_algo(items,mu,sigma,eps,chances):
    items=int(items)
    explor=0.0
    selected=0
    selected_up=0
    selected_down=0
    total_exploit=0.0
    tmp_all_exploit=[[] for j in range(items)]
    tmp_all_mean=np.zeros(items)
    tmp_all_atmpt_std=np.zeros(items)
    tmp_all_upp_lim=np.zeros(items)
    tmp_all_low_lim=np.zeros(items)
    
    
    #2 set of exploration
    ex_lim=int(eps*chances)
    for i in range(ex_lim):
        tmp_count=np.random.randint(0,items,size=1)[0]
        tmp_exploit=np.random.normal(mu[tmp_count],sigma[tmp_count])
        tmp_all_exploit[tmp_count].append(tmp_exploit)
        tmp_all_mean[tmp_count]=np.mean(tmp_all_exploit[tmp_count])
        tmp_all_atmpt_std[tmp_count]=np.std(tmp_all_exploit[tmp_count])
        tmp_all_upp_lim[tmp_count]=tmp_all_mean[tmp_count]+1*tmp_all_atmpt_std[tmp_count]
        tmp_all_low_lim[tmp_count]=tmp_all_mean[tmp_count]-1*tmp_all_atmpt_std[tmp_count]
        total_exploit+=tmp_exploit
    
     

    #exploit        
    for i in range(chances-ex_lim):
        tmp_best_low=0.0
        tmp_best_up=0.0
        for j in range(items):
            if tmp_all_upp_lim[j]>tmp_best_up:
                tmp_best_up=tmp_all_upp_lim[j]
                selected_up=j
            if tmp_all_low_lim[j]>tmp_best_low:
                tmp_best_low=tmp_all_low_lim[j]
                selected_up=j
        
        if tmp_best_up>tmp_best_low:
            selected=selected_up
        else:
            selected=selected_down
            
        
        inst_exploit=np.random.normal(mu[selected],sigma[selected])
        tmp_all_exploit[selected].append(inst_exploit)
        tmp_all_mean[selected]=np.mean(tmp_all_exploit[selected])
        tmp_all_atmpt_std[selected]=np.std(tmp_all_exploit[selected])
        tmp_all_upp_lim[selected]=tmp_all_mean[selected]+1*tmp_all_atmpt_std[selected]
        tmp_all_low_lim[selected]=tmp_all_mean[selected]-1*tmp_all_atmpt_std[selected]
        total_exploit+=inst_exploit
        
        
    return(total_exploit)


In [207]:
New_algo(len(rest),mean,std,0.2,num_iter)

2850.116851202286

In [208]:
list_new_algo=np.array([])
for i in range(1000):
    list_new_algo=np.append(list_new_algo,New_algo(len(rest),mean,std,0.1,num_iter))

In [209]:
np.average(list_new_algo)

2818.5974829454995

In [210]:
np.max(list_new_algo)

3101.4981323798884

In [211]:
np.min(list_new_algo)

2251.1283009706103

## An Instance generator for the problem based on the video

In [101]:
def Instancetest(inp_items,inp_std):
    inst_mean=np.zeros(inp_items)
    inst_std=np.zeros(inp_items)
    for i in range(inp_items):
        
        inst_mean[i]=3*(i+1)
        inst_std[i]=(3*(i+1))*inp_std
        
    return(inst_mean,inst_std)

In [102]:
[a,b]=Instancetest(10,0.1)

## Check of all the types

In [220]:
no_of_items=[3,10,30,100]
std_range=[0.1,0.3,0.5]
num_of_days=300

list_naive_t=np.array([])
list_exploit_t=np.array([])
list_one_eps_exploit_t=np.array([])
list_UCB1_t=np.array([])
list_new_algo_t=np.array([])
list_of_std=np.array([])
list_no_items=np.array([])
list_max_gain=np.array([])
for i in range(10):
    for j in no_of_items:
        for k in std_range:
            
            [Instance_mean,Instance_std]=Instancetest(j,k)
            
            list_no_items=np.append(list_no_items,j)
            list_of_std=np.append(list_of_std,k)
            temp_regret=np.max(Instance_mean)*num_of_days
            list_max_gain=np.append(list_max_gain,temp_regret)
            list_naive_t=np.append(list_naive_t,Naive_Method(j,Instance_mean,Instance_std,num_of_days)/temp_regret)
            list_exploit_t=np.append(list_exploit_t,Exploit_Only_Method(j,Instance_mean,Instance_std,num_of_days)/temp_regret)
            list_one_eps_exploit_t=np.append(list_one_eps_exploit_t,one_eps_exploit(j,Instance_mean,Instance_std,0.2,num_of_days)/temp_regret)
            list_UCB1_t=np.append(list_UCB1_t,UCB1(j,Instance_mean,Instance_std,num_of_days)/temp_regret)
            list_new_algo_t=np.append(list_new_algo_t,New_algo(j,Instance_mean,Instance_std,0.1,num_of_days)/temp_regret)


final_data=pd.DataFrame(columns=['No_of_items','std','Minimum_regret','Naive_algo','Max_Exploit','one-eps','UCB1','New_algo'])
final_data['No_of_items']=list_no_items
final_data['std']=list_of_std
final_data['Minimum_regret']=list_max_gain
final_data['Naive_algo']=list_naive_t
final_data['Max_Exploit']=list_exploit_t
final_data['one-eps']=list_one_eps_exploit_t
final_data['UCB1']=list_UCB1_t
final_data['New_algo']=list_new_algo_t



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


In [221]:
final_data

Unnamed: 0,No_of_items,std,Minimum_regret,Naive_algo,Max_Exploit,one-eps,UCB1,New_algo
0,3.0,0.1,2700.0,0.668657,0.983768,0.939333,0.998252,0.963217
1,3.0,0.3,2700.0,0.687260,1.016423,0.933581,0.664438,0.985178
2,3.0,0.5,2700.0,0.655128,1.084041,0.939258,0.629019,1.000811
3,10.0,0.1,9000.0,0.556410,0.985598,0.912526,0.991640,0.949764
4,10.0,0.3,9000.0,0.584410,0.603216,0.910733,0.996887,0.941938
...,...,...,...,...,...,...,...,...
115,30.0,0.3,27000.0,0.517162,0.774175,0.716208,0.941770,0.078380
116,30.0,0.5,27000.0,0.504530,0.921659,0.860759,0.957454,0.074899
117,100.0,0.1,90000.0,0.540687,0.840931,0.852549,0.827167,0.048069
118,100.0,0.3,90000.0,0.499015,0.852367,0.882006,0.814984,0.054483


In [222]:
final_data.groupby(by=['No_of_items','std']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Minimum_regret,Naive_algo,Max_Exploit,one-eps,UCB1,New_algo
No_of_items,std,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
3.0,0.1,2700.0,0.659896,0.99541,0.932578,0.997325,0.969449
3.0,0.3,2700.0,0.656684,0.926971,0.934558,0.868435,0.96296
3.0,0.5,2700.0,0.657745,0.911294,0.942689,0.90459,0.976104
10.0,0.1,9000.0,0.555287,0.967126,0.88415,0.934114,0.869762
10.0,0.3,9000.0,0.55201,0.853236,0.898529,0.931131,0.682808
10.0,0.5,9000.0,0.550937,0.87382,0.824727,0.865249,0.851219
30.0,0.1,27000.0,0.515168,0.916823,0.863119,0.906367,0.338959
30.0,0.3,27000.0,0.520988,0.876224,0.826722,0.901418,0.476336
30.0,0.5,27000.0,0.513457,0.820392,0.826986,0.900178,0.246889
100.0,0.1,90000.0,0.507655,0.813739,0.861869,0.81902,0.063141
