In [1]:
#pragma cling add_include_path("/rl/include/")
#pragma cling add_library_path("/rl/lib/")
#pragma cling load("libkbandit.so")

In [2]:
#include <kbandit/k-bandit.h>
#include <iostream>
#include <string>
#include <numeric>
#include <algorithm>

#include <fmt/core.h>

In [3]:
void do_test(const std::string& agent_name, KBanditsAgent* agent, unsigned int tests, KBandits& bandits) {
	std::cout << agent_name << " agent\n";

    TVectorD results(tests);
    TVectorD steps(tests);
    double total_reward= 0.;
    for(int i=0; i != tests; ++i){
        std::size_t selection = agent->get_selection();
        double reward = bandits.get_reward(selection);
        agent->add_reward(selection, reward);
        
        results[i] = reward;
        steps[i] = i;
        total_reward += reward;
    }

	fmt::print("\tTotal reward: {}\n", total_reward);
	fmt::print("\tAverage reward: {}\n", total_reward / tests);
	fmt::print("\tBest bandit to agent: {}\n", agent->get_best_bandit());
}

In [13]:
std::cout << "Running K-Bandits tests \n";

// Initialize
const unsigned int tests = 10000;
const unsigned int n_bandits = 10;
const double initial_agent_estimate = 0.0;

auto bandits = KBandits(0.0, 1.0, 1.0, n_bandits, 42);
fmt::print("Best bandit: {}\n", bandits.best_bandit());
for (size_t i = 0; i != n_bandits; i++) {
    Bandit& b = bandits.get_bandit(i);
    fmt::print("\t{0} :: {1:.3f} ({2:.3f})\n", i, b.mean_reward(), b.variance());
}

Running K-Bandits tests 
Best bandit: 8
	0 :: -1.714 (1.000)
	1 :: 0.178 (1.000)
	2 :: 0.057 (1.000)
	3 :: -1.410 (1.000)
	4 :: 0.756 (1.000)
	5 :: -0.582 (1.000)
	6 :: -1.602 (1.000)
	7 :: -0.304 (1.000)
	8 :: 1.017 (1.000)
	9 :: -0.584 (1.000)


In [14]:
// Greedy
BasicGreedyAgent greedy_agent(n_bandits, 0.0, initial_agent_estimate);
do_test("Greedy", &greedy_agent, tests, bandits);

Greedy agent
	Total reward: 10003.276689743276
	Average reward: 1.0003276689743277
	Best bandit to agent: 8


In [15]:
// e-0.1
BasicGreedyAgent e01_agent(n_bandits, 0.1, initial_agent_estimate);
do_test("e-0.1", &e01_agent, tests, bandits);

e-0.1 agent
	Total reward: 8294.818010948004
	Average reward: 0.8294818010948004
	Best bandit to agent: 8


In [16]:
// e-0.01
BasicGreedyAgent e001_agent(n_bandits, 0.01, initial_agent_estimate);
do_test("e-0.01", &e001_agent, tests, bandits);

e-0.01 agent
	Total reward: 9724.747756454897
	Average reward: 0.9724747756454897
	Best bandit to agent: 8
