In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import texttable
import sys
sys.path.append("../src")
from data_proc import *
from policy import *
from simulate import *
from util import Progbar

pd.options.mode.chained_assignment = None

In [2]:
plt.rcParams['figure.figsize'] = [10, 7.5]

In [3]:
DATA_PATH = "../data/warfarin.csv"  # path to raw Warfarin data file

In [4]:
def evaluate_risk(simulator, policies, num_runs=100):
    """
    Simulates a list of policies, each for 'num_runs' times (using the same sets of random permutations);
    and compares the policies' regret & "risk":
    
    We judge a policy's risk based on how often it chooses low (high) when the optimal decision is high (low)
    
    simulator:
        WarfarinSimulator instance
    policies:
        List of (Policy, string) tuples, where the string is a description of the policy
        The Policy instances should implement reset(), choose_arm(), and update_policy()
    """
    train_size = simulator.train_size  # number of online learning steps
    
    # record simulation results
    header = ['Policy', 'Mean', 'Std', 'Max', 'Min']
    table_regret = texttable.Texttable()
    table_regret.header(header)
    table_risk = texttable.Texttable()
    table_risk.header(header)
    
    base_seed = np.random.randint(1000)  # randomizing dataset permutations
    for policy, policy_name in policies:
        print("Start simulating {}".format(policy_name))
    
        regrets = []  # total regrets
        risk_percent = []  # percentage of severe mistakes (low->high or high->low)
        progbar = Progbar(num_runs)  # progress bar
        for i in range(num_runs):
            policy.reset()  # Reset the policy to its initial states            
            simulator.simulate(policy, eval_every=10000, random_seed=(base_seed+i))
            regrets.append(simulator.get_total_regret())
            confusion_matrix = simulator.get_confusion_matrix()
            risk_percent.append((confusion_matrix[0,2] + confusion_matrix[2,0]) / train_size * 100)
            progbar.update(i+1)

        table_regret.add_row((policy_name, np.mean(regrets), np.std(regrets),
                              np.max(regrets), np.min(regrets)))
        table_risk.add_row((policy_name, np.mean(risk_percent), np.std(risk_percent),
                            np.max(risk_percent), np.min(risk_percent)))

    print("Regret Statistics:")
    print(table_regret.draw())
    print("Risk Statistics (Percentage of high->low & low->high decisions):")
    print(table_risk.draw())


simulator = WarfarinSimulator(DATA_PATH, discretize_label_3, test_size=500, add_bias=True)
num_features = simulator.num_features
num_arms = simulator.num_arms

evaluate_risk(simulator, [
    (WarfarinLinearOraclePolicy(DATA_PATH, discretize_label_3), "LinearOracle"),
    (LinUCBPolicy(num_features, num_arms, alpha=1.0), "LinUCB"),
    (LinUCBSafePolicy(num_features, num_arms, alpha=1.0), "LinUCBSafe"),
    (EpsilonGreedyPolicy(num_features, num_arms, eps_schedule=lambda x: 0), "Greedy"),
    (EpsilonGreedyPolicy(num_features, num_arms, eps_schedule=lambda t: 0.2-t/20000), "EpsDecay"),
    (EpsilonGreedyPolicy(num_features, num_arms, eps_schedule=lambda x: 0.2), "EpsGreedy(0.2)"),
    (WarfarinFixedDosePolicy(1), "Fixed-dose"),
], num_runs=500)

Instantiated a Warfarin Bandit simulator!
Number of arms: 3
Number of features: 24
Size of training set for online learning: 5028
Size of holdout validation set: 500
Start simulating LinearOracle
Start simulating LinUCB
Start simulating LinUCBSafe
Start simulating Greedy
Start simulating EpsDecay
Start simulating EpsGreedy(0.2)
Start simulating Fixed-dose
Regret Statistics:
+----------------+----------+---------+------+------+
|     Policy     |   Mean   |   Std   | Max  | Min  |
| LinearOracle   | 1653.960 | 10.611  | 1688 | 1622 |
+----------------+----------+---------+------+------+
| LinUCB         | 1720.928 | 24.260  | 1804 | 1648 |
+----------------+----------+---------+------+------+
| LinUCBSafe     | 1718.906 | 23.881  | 1796 | 1653 |
+----------------+----------+---------+------+------+
| Greedy         | 1761.762 | 169.699 | 2977 | 1672 |
+----------------+----------+---------+------+------+
| EpsDecay       | 1839.540 | 26.757  | 1930 | 1757 |
+----------------+----------+

In [21]:
simulator = WarfarinSimulator(DATA_PATH, discretize_label_3, test_size=500, add_bias=True)
num_features = simulator.num_features
num_arms = simulator.num_arms
alpha = np.logspace(-1, 1, 5, endpoint=True)

for a in alpha:
    table_regret, table_risk = evaluate_risk(simulator, [
        (WarfarinLinearOraclePolicy(DATA_PATH, discretize_label_3), "LinearOracle"),
        (LinUCBPolicy(num_features, num_arms, alpha=a), "LinUCB"),
    #     (LinUCBSafePolicy(num_features, num_arms, alpha=1.0), "LinUCBSafe"),
    #     (EpsilonGreedyPolicy(num_features, num_arms, eps_schedule=lambda x: 0), "Greedy"),
    #     (EpsilonGreedyPolicy(num_features, num_arms, eps_schedule=lambda t: 0.2-t/20000), "EpsDecay"),
    #     (EpsilonGreedyPolicy(num_features, num_arms, eps_schedule=lambda x: 0.2), "EpsGreedy(0.2)"),
        (WarfarinFixedDosePolicy(1), "Fixed-dose"),
    ], num_runs=10)
    break

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2910, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-21-9c0bac654412>", line 1, in <module>
    simulator = WarfarinSimulator(DATA_PATH, discretize_label_3, test_size=500, add_bias=True)
  File "../src/simulate.py", line 26, in __init__
  File "/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py", line 709, in parser_f
    return _read(filepath_or_buffer, kwds)
  File "/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py", line 449, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
  File "/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py", line 818, in __init__
    self._make_engine(self.engine)
  File "/anaconda3/lib/python3.6/site-packages/pandas/io/parsers.py", line 1049, in _make_engine
    self._engine = CParserWrapper(self.f, **self.options)
  File "/anaconda3/lib/python3.6/site

FileNotFoundError: File b'../data/warfarin.csv' does not exist

In [None]:
# alpha -> 400~500
# linUCB, oracle, greedy
# thompson sampling
# plot risk vs alpha, regret vs alpha
# alpha [0.1,10]
# algo linusbsafe
# reward

print (table_regret)
print (table_risk)

# def plot_helper():
    