In [1]:
import numpy as np
from environment import run_experiment, RunParameters, RunStatistics
from baifg.model.feedback_graph import FeedbackGraph
from baifg.model.reward_model import GaussianRewardModel, RewardType
from baifg.algorithms.eps_greedy import EpsilonGreedy, EpsilonGreedyParameters
from baifg.algorithms.ucb import UCB
from baifg.algorithms.base.graph_estimator import GraphEstimator
from baifg.algorithms.base.base_algorithm import BaseAlg
from baifg.utils.graphs import make_loopless_clique
from itertools import product
from typing import List
from tqdm import tqdm

def make_model(algName: BaseAlg, K: int, fg: FeedbackGraph, delta: float, informed: bool) -> BaseAlg:
    if algName == EpsilonGreedy:
        return EpsilonGreedy(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            fg.reward_model.reward_type,
            delta=delta,
            parameters=EpsilonGreedyParameters(exp_rate=0.3),)
    elif algName == UCB:
        return UCB(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            reward_type=fg.reward_model.reward_type,
            delta=delta)
    raise Exception('Algorithm not found')



In [8]:
seed = 0

envs: List[RunParameters] = []
Kvalues = [5, 10, 15]
delta = np.exp(-np.linspace(1,9,10))

for K, delta, informed in product(Kvalues, delta, [False]):
    envs.append(
        RunParameters('Loopless clique', f'p=0.5, K={K}, delta={delta}, informed={informed}', delta, informed=True,
                      known=False, fg=make_loopless_clique(p=0.5, mu=np.linspace(0, 1, K)),
                      results = {})
    )

algorithms = [EpsilonGreedy, UCB]


In [9]:

Nsims = 10

for env in envs:
    print(f'Running {env.name} - {env.description}')
    for n in tqdm(range(Nsims)):
        for algo in algorithms:
            algo = make_model(algName=algo, K=env.fg.K, fg=env.fg, delta=env.delta, informed=env.informed)
            results = run_experiment(fg=env.fg, algo=algo, seed=n)

            if algo.NAME not in env.results:
                env.results[algo.NAME] = []
            env.results[algo.NAME].append(results)


Running Loopless clique - p=0.5, K=5, delta=0.36787944117144233, informed=False


100%|██████████| 10/10 [00:07<00:00,  1.36it/s]


Running Loopless clique - p=0.5, K=5, delta=0.15123975969049577, informed=False


100%|██████████| 10/10 [00:13<00:00,  1.33s/it]


Running Loopless clique - p=0.5, K=5, delta=0.06217652402211632, informed=False


100%|██████████| 10/10 [00:13<00:00,  1.35s/it]


Running Loopless clique - p=0.5, K=5, delta=0.025561533206507402, informed=False


100%|██████████| 10/10 [00:16<00:00,  1.67s/it]


Running Loopless clique - p=0.5, K=5, delta=0.01050866046540279, informed=False


100%|██████████| 10/10 [00:23<00:00,  2.34s/it]


Running Loopless clique - p=0.5, K=5, delta=0.004320239474094066, informed=False


100%|██████████| 10/10 [00:27<00:00,  2.72s/it]


Running Loopless clique - p=0.5, K=5, delta=0.0017761035457343791, informed=False


100%|██████████| 10/10 [00:31<00:00,  3.19s/it]


Running Loopless clique - p=0.5, K=5, delta=0.0007301779968647981, informed=False


100%|██████████| 10/10 [00:53<00:00,  5.31s/it]


Running Loopless clique - p=0.5, K=5, delta=0.000300185148769037, informed=False


100%|██████████| 10/10 [01:24<00:00,  8.50s/it]


Running Loopless clique - p=0.5, K=5, delta=0.00012340980408667956, informed=False


100%|██████████| 10/10 [01:29<00:00,  8.97s/it]


Running Loopless clique - p=0.5, K=10, delta=0.36787944117144233, informed=False


100%|██████████| 10/10 [00:17<00:00,  1.79s/it]


Running Loopless clique - p=0.5, K=10, delta=0.15123975969049577, informed=False


100%|██████████| 10/10 [00:37<00:00,  3.73s/it]


Running Loopless clique - p=0.5, K=10, delta=0.06217652402211632, informed=False


100%|██████████| 10/10 [01:00<00:00,  6.07s/it]


Running Loopless clique - p=0.5, K=10, delta=0.025561533206507402, informed=False


100%|██████████| 10/10 [00:59<00:00,  5.97s/it]


Running Loopless clique - p=0.5, K=10, delta=0.01050866046540279, informed=False


100%|██████████| 10/10 [01:05<00:00,  6.55s/it]


Running Loopless clique - p=0.5, K=10, delta=0.004320239474094066, informed=False


 60%|██████    | 6/10 [00:43<00:28,  7.24s/it]


KeyboardInterrupt: 

In [7]:
env

RunParameters(name='Loopless clique', description='p=0.5, K=5, delta=0.049787068367863944, informed=False', delta=np.float64(0.049787068367863944), informed=True, known=False, fg=<baifg.model.feedback_graph.FeedbackGraph object at 0x000001D37DF85790>, results={'Epsilon-greedy': [RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=1768), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=245), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=747), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=5229), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=514), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=446), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=6272), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=1921), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=4211), RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=737)], 'UCB': [RunStatistics(es

In [5]:
print(algo.graph.N)
# print(algo.graph.Npair)
# print(algo.reward.M)
# print(algo.reward.mu)
print(algo.graph.G)
algo.reward.confidence

m = (algo.reward.mu + algo.reward.confidence).argmax()
algo.graph.G[:,m]

algo.graph.confidence


[2. 2. 2. 1. 0.]
[[0.5 1.5 0.5 1.  1. ]
 [1.  0.5 0.5 0.5 1. ]
 [1.  0.5 0.5 0.5 1. ]
 [2.  1.  2.  1.  2. ]
 [1.  1.  1.  1.  1. ]]


array([[0.72101344, 0.72101344, 0.72101344, 0.72101344, 0.72101344],
       [0.72101344, 0.72101344, 0.72101344, 0.72101344, 0.72101344],
       [0.72101344, 0.72101344, 0.72101344, 0.72101344, 0.72101344],
       [1.01966699, 1.01966699, 1.01966699, 1.01966699, 1.01966699],
       [1.01966699, 1.01966699, 1.01966699, 1.01966699, 1.01966699]])