In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from environment import run_experiment, RunParameters, RunStatistics
from baifg.model.feedback_graph import FeedbackGraph
from baifg.model.reward_model import GaussianRewardModel, RewardType
from baifg.algorithms.eps_greedy import EpsilonGreedy, EpsilonGreedyParameters
from baifg.algorithms.ucb import UCB
from baifg.algorithms.exp3g import Exp3G, Exp3GParameters
from baifg.algorithms.tas_fg import TaSFG, TaSFGParameters
from baifg.algorithms.base.graph_estimator import GraphEstimator
from baifg.algorithms.base.base_algorithm import BaseAlg
from baifg.utils.graphs import make_loopless_clique
from itertools import product
from typing import List, NamedTuple
from tqdm import tqdm

def make_model(algo_name: BaseAlg, algo_params: NamedTuple, K: int, fg: FeedbackGraph, delta: float, informed: bool) -> BaseAlg:
    if algo_name == EpsilonGreedy:
        return EpsilonGreedy(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            fg.reward_model.reward_type,
            delta=delta,
            parameters=algo_params)
    elif algo_name == UCB:
        return UCB(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            reward_type=fg.reward_model.reward_type,
            delta=delta)
    elif algo_name == Exp3G:
        return Exp3G(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            reward_type=fg.reward_model.reward_type,
            delta=delta,
            parameters=algo_params
        )
    elif algo_name == TaSFG:
        return TaSFG(
            GraphEstimator.optimistic_graph(K, informed=informed, known=False),
            reward_type=fg.reward_model.reward_type,
            delta=delta, parameters=TaSFGParameters(update_frequency=2 * K)
        )
    raise Exception('Algorithm not found')



In [2]:
K=5
fg=make_loopless_clique(p=0.5, mu=np.linspace(0, 1, K))
algo = make_model(algo_name=TaSFG, algo_params=TaSFGParameters(update_frequency=5),
                   K=fg.K, fg=fg, delta=1e-2, informed=False)
results = run_experiment(fg=fg, algo=algo, seed=0)
results

RunStatistics(estimated_best_vertex=np.int64(4), stopping_time=1623)

In [3]:
print(algo.N)
print(algo.reward.M)
print(algo.reward.mu)
print(algo.graph.G.round(2))
print(fg.graph.G)


[589. 294. 309. 248. 182.]
[861. 396. 899. 434. 965.]
[-0.00539617  0.19448235  0.45827099  0.77825859  0.98934371]
[[0.   0.51 0.49 0.5  0.47]
 [0.77 0.   0.74 0.26 0.74]
 [0.82 0.16 0.   0.17 0.85]
 [0.9  0.15 0.91 0.   0.85]
 [0.9  0.08 0.92 0.1  0.01]]
[[0.         0.5        0.5        0.5        0.5       ]
 [0.75       0.         0.75       0.25       0.75      ]
 [0.83333333 0.16666667 0.         0.16666667 0.83333333]
 [0.875      0.125      0.875      0.         0.875     ]
 [0.9        0.1        0.9        0.1        0.        ]]
