In [1]:
#pragma cling add_include_path("/rl/include/")
#pragma cling add_library_path("/rl/lib/")
#pragma cling load("libkbandit.so")

In [2]:
#include <kbandit/k-bandit.h>
#include <iostream>
#include <string>
#include <numeric>
#include <algorithm>

In [3]:
void do_test(const std::string& agent_name, KBanditsAgent* agent, unsigned int tests, KBandits& bandits) {
    std::cout << agent_name << " agent\n";

    std::vector<double> results(tests, 0.0);
    std::transform(results.begin(), results.end(), results.begin(), 
        [agent, &bandits](auto val) {
            std::size_t selection = agent->get_selection();
            double reward = bandits.get_reward(selection);
            agent->add_reward(selection, reward);

            return reward;
        });

    double total_reward = std::reduce(results.begin(), results.end(), 0.0);
    std::cout << "\tTotal reward: " << total_reward << '\n';
    std::cout << "\tAverage reward: " << total_reward / tests << '\n';
    std::cout << "\tBest bandit to agent: " << agent->get_best_bandit() << '\n';
}

In [4]:
std::cout << "Running K-Bandits tests" << std::endl;

// Initialize
const unsigned int tests = 1000;
const unsigned int n_bandits = 10;
const double initial_agent_estimate = 0.0;

auto bandits = KBandits(0.0, 1.0, 1.0, n_bandits, 42);
std::cout <<  "Best bandit: " << bandits.best_bandit() << std::endl;
for (size_t i = 0; i != n_bandits; i++) {
    Bandit& b = bandits.get_bandit(i);
    std::cout << "\t" << i << " :: " << b.mean_reward() << " " << b.variance();
}

[1minput_line_10:8:1: [0m[0;1;31merror: [0m[1muse of undeclared identifier 'fmt'[0m
fmt::print("Best bandit: {}\n", bandits.best_bandit());
[0;1;32m^
[0m

Interpreter Error: 

In [None]:
// Greedy
BasicGreedyAgent greedy_agent(n_bandits, 0.0, initial_agent_estimate);
do_test("Greedy", &greedy_agent, tests, bandits);

In [None]:
// e-0.1
BasicGreedyAgent e01_agent(n_bandits, 0.1, initial_agent_estimate);
do_test("e-0.1", &e01_agent, tests, bandits);

In [None]:
// e-0.01
BasicGreedyAgent e001_agent(n_bandits, 0.01, initial_agent_estimate);
do_test("e-0.01", &e001_agent, tests, bandits);