# Policy Evaluation Examples

In [1]:
import sys
sys.path.append("../")

from blbf.STBT import STBT
from blbf.PolicyEvaluation import PolicyEvaluation 
import blbf.DataReader as DataReader
import blbf.utils as utils
import pandas as pd
import numpy as np
import sklearn.linear_model 
import seaborn as sns
import matplotlib.pyplot as plt



## Compare the following methods

* IPS: Inverse Propensity Score
* DM: Direct Method (Reward Prediction)
* DR: Doubly Robust
* SWITCH: Switch Estimator

In [2]:
class ComparePolicyEvaluation:
    
    def __init__(self, B: int = 100, datasets: list = None):
        self.B = B
        self.datasets = datasets
        
    def __repr__(self):
            
        items = ("%s = %r" % (k, v) for k, v in self.__dict__.items())
        return "<%s: {%s}>" % (self.__class__.__name__, ', '.join(items))

    def fit_policies(self, **kwargs) -> pd.DataFrame:
        
        if self.datasets is None:
            self.datasets = ['ecoli', 'glass', 'lymphography', 'yeast', 
                        'digits', 'breast-cancer', 'wine'] # 'letter-recognition'
        dat = list()
        true_value = list()
        ips = list()
        dm = list()
        dr = list()
        switch = list()
        
        for s in self.datasets:
            for b in range(self.B):
                if (b % 10) == 0:
                    print("Sample: %d - Dataset: %s" % (b, s))
                X, y = DataReader.get_data(dataset=s)
                d = STBT().generate_batch(X, y, max_iter=1000)
                dat.append(s)
                true_value.append(d.true_target_value_test)
                ips.append(PolicyEvaluation(method='ips').evaluate_policy(data = d))
                dm.append(PolicyEvaluation(method='dm').evaluate_policy(data = d, **kwargs))
                dr.append(PolicyEvaluation(method='dr').evaluate_policy(data = d, **kwargs))
                switch.append(PolicyEvaluation(method='switch').evaluate_policy(data = d, **kwargs))
           
        res = pd.DataFrame.from_dict({'dataset':dat, 'true_value':true_value, 'ips':ips,
                                     'dm': dm, 'dr':dr, 'switch': switch})
    
        # Bias
        res['ips_bias'] = res['true_value'].values - res['ips'].values
        res['dm_bias'] = res['true_value'].values - res['dm'].values
        res['dr_bias'] = res['true_value'].values - res['dr'].values
        res['switch_bias'] = res['true_value'].values - res['switch'].values
        
        # Relative risk
        res['ips_rr'] = np.abs((res['true_value'].values - res['ips'].values)/res['true_value'].values)
        res['dm_rr'] = np.abs((res['true_value'].values - res['dm'].values)/res['true_value'].values)
        res['dr_rr'] = np.abs((res['true_value'].values - res['dr'].values)/res['true_value'].values)
        res['switch_rr'] = np.abs((res['true_value'].values - res['switch'].values)/res['true_value'].values)
        
        self.res = res
       
        return self
    
    def get_summary_stats(self):
        
        res_summary = self.res.groupby(['dataset'], as_index=False).agg({
                            'ips_bias': ['mean','std'], 
                            'dm_bias': ['mean','std'],
                            'dr_bias': ['mean','std'],
                            'switch_bias': ['mean','std'],
                            'ips_rr': ['mean','std'], 
                            'dm_rr': ['mean','std'],
                            'dr_rr': ['mean','std'],
                            'switch_rr': ['mean','std']
                            })
        
        self.res_summary = res_summary
        return self
    
    def plot_bias(self):
        
        res_long = pd.melt(self.res, id_vars=['dataset'], var_name = 'method', value_name = "bias",
                  value_vars=['ips_bias',  'dm_bias', 'dr_bias', 'switch_bias'])

        ax = sns.catplot(x="method", y="bias", col = "dataset", kind = "box", 
                          col_wrap=3, data=res_long)
        for i in range(len(ax.axes)):
            ax_i = ax.axes[i]
            ax_i.axhline(0, ls="--")
        
        plt.show()

In [None]:
cpe = ComparePolicyEvaluation(B=100).fit_policies(max_iter=1000)

Sample: 0 - Dataset: ecoli


In [None]:
cpe.get_summary_stats()
cpe.res_summary   

In [None]:
cpe.plot_bias()