In [None]:
from helper_methods import run_episodes, plot_and_save, run_predefined_means
import numpy as np
import matplotlib.pyplot as plt
import pickle
import math
import random

%load_ext autoreload
%autoreload 2

In [2]:
config = {
    'k' : 10, #number of actions
    'm' : 20, #number of agents
    'iters' : int(5e4), #horizon
    'episodes' : 100, #number of repetitions for the experiment
    'var' : 1, #variance of the rewards
    'c' : math.sqrt(1) #confidence coefficient based on variance
}

config['mu'] = 'random'
config['epsilons'] = np.array( [0.9] * int(config['m'] / 4) 
                              + [0.93] * int(config['m'] / 4) 
                              + [0.95] * int(config['m'] / 4) 
                              + [0.99] * int(config['m'] / 4) ) 


# Get unique values from epsilons and sort them
unique_epsilons = sorted(set(config['epsilons']))

# Convert unique_epsilons to a string with dashes
epsilons_str = '-'.join(map(str, unique_epsilons))
epsilons_str = epsilons_str.replace('.', '_')

# Create a filename with "epis," formatted "iters," unique epsilons, and epsilons indication
formatted_iters = f"{config['iters']:.0e}"
foldername = f"experiment_k{config['k']}_m{config['m']}_iters{formatted_iters}_epis{config['episodes']}_var{config['var']}_eps{epsilons_str}"

base_actions = np.random.randint(config['k'], size=(config['m'],))

print(foldername)
print(base_actions)

experiment_k10_m20_iters5e+04_epis100_var1_eps0_9-0_93-0_95-0_99
[4 8 5 4 3 6 2 3 7 7 5 5 6 8 8 0 5 3 4 9]


In [3]:
foldername += '_c_0-5_prerec'

In [4]:
vars = {}
algorithms = [
    ('SAE', True, 'Weighted'),
    ('SAE', True, 'Scheduled'),
    ('UCB', False, ''),
    ('SAE', False, ''),
    ('SAE', True, 'Horizontal'),
    ('SAE', True, 'Vertical')
]

In [5]:
config['mu'] = [0.8] + [0] * int(config['k'] / 2 - 1) + [1.0] + [0] * (config['k'] - int(config['k'] / 2 - 1) - 2)
random.shuffle(config['mu'])

[1.0, 0, 0, 0, 0.8, 0, 0, 0, 0, 0]


In [36]:
# Run with means defined above
for alg_name, rep, mode in algorithms:
    name, file_loc = run_episodes(alg_name, 
                rep, 
                mode, 
                iters=config['iters'], 
                k=config['k'], 
                episodes=config['episodes'], 
                m=config['m'], 
                var=config['var'],
                mu=config['mu'], 
                eps=config['epsilons'],
                folder_name=foldername,
                base_actions=base_actions)

    vars[name] = pickle.load(open(file_loc, "rb"))

Experiment ended: 10 20 50000 [0.9  0.9  0.9  0.9  0.9  0.93 0.93 0.93 0.93 0.93 0.95 0.95 0.95 0.95
 0.95 0.99 0.99 0.99 0.99 0.99]


In [None]:
# Run with means stored in a file
for alg_name, rep, mode in algorithms:
    name, file_loc = run_predefined_means(alg_name, 
                rep, 
                mode, 
                iters=config['iters'], 
                k=config['k'], 
                episodes=config['episodes'], 
                m=config['m'], 
                var=config['var'],
                #mu=config['mu'], 
                eps=config['epsilons'],
                folder_name=foldername)

    vars[name] = pickle.load(open(file_loc, "rb"))

In [None]:
results = []
labels = []

for elt in vars:
    results.append(np.sum(vars[elt]['regret'], axis=1))
    labels.append(elt)
info = ' (K:'+str(config['k'])+', M: '+str(config['m']) + ', var: ' + str(config['var']) + ')'
plot_and_save(results,labels, info=info, f_name='results_'+foldername)

## 