In [3]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from environment import run_experiment, RunParameters, RunStatistics
from baifg.model.feedback_graph import FeedbackGraph
from baifg.model.reward_model import GaussianRewardModel, RewardType
from baifg.algorithms.eps_greedy import EpsilonGreedy, EpsilonGreedyParameters
from baifg.algorithms.ucb import UCB
from baifg.algorithms.exp3g import Exp3G, Exp3GParameters
from baifg.algorithms.base.graph_estimator import GraphEstimator
from baifg.algorithms.base.base_algorithm import BaseAlg
from baifg.utils.graphs import make_loopless_clique
from itertools import product
from typing import List, NamedTuple
from tqdm import tqdm

def make_model(algo_name: BaseAlg, algo_params: NamedTuple, K: int, fg: FeedbackGraph, delta: float, informed: bool) -> BaseAlg:
    if algo_name == EpsilonGreedy:
        return EpsilonGreedy(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            fg.reward_model.reward_type,
            delta=delta,
            parameters=algo_params)
    elif algo_name == UCB:
        return UCB(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            reward_type=fg.reward_model.reward_type,
            delta=delta)
    elif algo_name == Exp3G:
        return Exp3G(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            reward_type=fg.reward_model.reward_type,
            delta=delta,
            parameters=algo_params
        )
    raise Exception('Algorithm not found')



In [5]:
K=5
fg=make_loopless_clique(p=0.5, mu=np.linspace(0, 1, K))
algo = make_model(algName=UCB, K=fg.K, fg=fg, delta=1e-2, informed=False)
results = run_experiment(fg=fg, algo=algo, seed=0)
results

RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=3006)

In [15]:
gaps = rm.gaps.copy() ** 2
gaps[0] = gaps[1]

x = ( 1 / gaps)
x = x / x.sum()
x

array([0.38811881, 0.38811881, 0.12673267, 0.0970297 ])