# Evaluation of Targeting Policies

In [None]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score, mean_squared_error, mean_absolute_error
from tqdm import tqdm
import sys
import warnings

if not sys.warnoptions:
    warnings.simplefilter("ignore")

In [None]:
import sys

sys.path.append(
    'C:/Users/julia/OneDrive - Humboldt-Universitaet zu Berlin, CMS/Desktop_alt/thesis/code/treatment-learn')
from treatlearn.policy import bayesian_targeting_policy
from treatlearn.evaluation import transformed_outcome_loss, expected_policy_profit

## Load Data

Confirm the path to the actual data and the path where the model results are saved.

In [None]:
import matplotlib
import matplotlib.pyplot as plt
# from matplotlib import rc
# rc('font',**{'family':'serif','serif':['cm']})
# ## for Palatino and other serif fonts use:
# #rc('font',**{'family':'serif','serif':['Palatino']})
# rc('text', usetex=False)
# #matplotlib.rcParams['mathtext.fontset'] = 'cm'
# matplotlib.pyplot.title(r'ABC123 vs $\mathrm{ABC123}^{123}$')
#
# plt.rc('text.latex', preamble=r'\usepackage{underscore}')

Results of different runs, some after scaling

In [None]:
# 5.2 All models, without scaling
DATA_PATH = "data/fashionB_clean_nonlinear.csv"
RESULT_PATH = "prediction_test_results_5.2"
#RESULT_PATH = "oracle_prediction_test_targeting" # for scaled predictions after selection
predictions = np.load(f"results/{RESULT_PATH}.npy", allow_pickle=True)
predictions_test = [fold for fold in predictions]

In [None]:
#5.3. Regularization: 50 folds with CATE Scaling
RESULT_PATH = "prediction_test_scaled_cv_CATE"
predictions_test = np.load(f"results/{RESULT_PATH}.npy", allow_pickle=True)
predictions_test = [fold for fold in predictions_test]
RESULT_PATH = "prediction_train_scaled_cv_CATE"
predictions_train = np.load(f"results/{RESULT_PATH}.npy", allow_pickle=True)
predictions_train = [fold for fold in predictions_train]

In [None]:
#5.3. Regularization: 50 folds without CATE Scaling
RESULT_PATH = "prediction_test_cv"
predictions_test = np.load(f"results/{RESULT_PATH}.npy", allow_pickle=True)
predictions_test = [fold for fold in predictions_test]
RESULT_PATH = "prediction_train_cv"
predictions_train = np.load(f"results/{RESULT_PATH}.npy", allow_pickle=True)
predictions_train = [fold for fold in predictions_train]

In [None]:
#5.3. Regularization: 50 folds with Oracle Scaling, XBCF shifted beforehand
RESULT_PATH = "prediction_test_oracle_cv"
predictions_test = np.load(f"results/{RESULT_PATH}.npy", allow_pickle=True)
predictions_test = [fold for fold in predictions_test]
RESULT_PATH = "prediction_train_oracle_cv"
predictions_train = np.load(f"results/{RESULT_PATH}.npy", allow_pickle=True)
predictions_train = [fold for fold in predictions_train]

In [None]:
DATA_PATH = "data/fashionB_clean_nonlinear.csv"
# Load data
X = pd.read_csv(DATA_PATH)

c = X.pop('converted').to_numpy()
g = X.pop('TREATMENT').to_numpy()
y = X.pop('checkoutAmount').to_numpy()
tau_conversion = X.pop('TREATMENT_EFFECT_CONVERSION')
tau_basket = X.pop('TREATMENT_EFFECT_BASKET')
tau_response = X.pop('TREATMENT_EFFECT_RESPONSE').to_numpy()  #added



In [None]:
from datetime import date

today = date.today()
today

In [None]:
MARGIN_RATIO = 0.3
OFFER_COST = 10
from helper import *



## 1 Regularization Policy

### For Oracle Evaluation: Simulation with Grid of Lambdas

In [None]:
start = 0
stop = 0.5
step = 0.01

float_range_array = np.arange(start, stop, step)  #.round(2)

In [None]:
# Policy with Oracle setting
eval_profit_reg_left = []
for outcome_dict in tqdm(predictions_test):
    #outcome_dict_train = prediction_dict["train"]
    #outcome_dict = prediction_dict["test"]

    for gamma in float_range_array:
        # Calculate policy decision

        policy_dict, errors_reg = calc_bayesian_uncertainty_policy(
            treatment_dict=outcome_dict["treatment_spending"],
            pi_dict=outcome_dict['prediction_intervals'],
            conversion_dict=outcome_dict["conversion"],
            margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST, gamma=gamma,
            tail='left', calc_error=True, y=y[outcome_dict["idx"]],
            g=g[outcome_dict["idx"]], tau_true=tau_response[outcome_dict["idx"]])
        #policy_dict.update(reg_policy)

        profit = calc_policy_profit(
            policy_dict=policy_dict,
            y_true=y[outcome_dict["idx"]], c_true=c[outcome_dict["idx"]], g=g[outcome_dict["idx"]],
            margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST)
        profit.update(errors_reg)
        eval_profit_reg_left.append(profit)

In [None]:
np.save(f"results/eval_profit_oracle_error.npy", eval_profit_reg_left, allow_pickle=True)

### Empirical tuning of lambda (here: gamma), returns RSME:

In [None]:
eval_profit_reg_left = []
threshold_tuned = []

for fold in tqdm(range(len(predictions_test))):
    #print(fold)
    outcome_dict_train = predictions_train[fold]
    outcome_dict = predictions_test[fold]

    # Calculate policy decision
    policy_dict, errors = calc_bayesian_policy(
        treatment_dict=outcome_dict["treatment_spending"],
        conversion_dict=outcome_dict["conversion"],
        margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST,
        calc_error=True, y=y[outcome_dict["idx"]],
        g=g[outcome_dict["idx"]], tau_true=tau_response[outcome_dict["idx"]])

    # # Tune threshold for regularization
    threshold_dict = tune_gamma(outcome_dict_train["treatment_spending"], outcome_dict_train['prediction_intervals'],
                                conversion_dict=outcome_dict_train["conversion"], margin=MARGIN_RATIO,
                                contact_cost=0, offer_cost=OFFER_COST, y_true=y[outcome_dict_train["idx"]],
                                c_true=c[outcome_dict_train["idx"]], g=g[outcome_dict_train["idx"]],
                                prob_treatment=None)
    #

    reg_policy, errors_reg = calc_bayesian_uncertainty_policy(
        treatment_dict=outcome_dict["treatment_spending"],
        pi_dict=outcome_dict['prediction_intervals'],
        conversion_dict=outcome_dict["conversion"],
        margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST, gamma_dict=threshold_dict,
        tail='left', calc_error=True, y=y[outcome_dict["idx"]],
        g=g[outcome_dict["idx"]], tau_true=tau_response[outcome_dict["idx"]])
    policy_dict.update(reg_policy)

    errors['TOL'].update(errors_reg['TOL'])
    errors['RSME'].update(errors_reg['RSME'])
    errors['Ratio_test'].update(errors_reg['Ratio_test'])

    profit = calc_policy_profit(
        policy_dict=policy_dict,
        y_true=y[outcome_dict["idx"]], c_true=c[outcome_dict["idx"]], g=g[outcome_dict["idx"]],
        margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST)

    profit.update(errors)

    eval_profit_reg_left.append(profit)



Create Profit Dataframe

In [None]:
eval_profit = pd.concat([pd.DataFrame(x) for x in eval_profit_reg_left], axis=0, keys=range(len(eval_profit_reg_left)))
eval_profit.index.rename(["fold", "model"], inplace=True)

In [None]:
eval_profit

In [None]:
eval_profit = eval_profit.groupby("model").mean()

In [None]:
eval_profit.index = pd.MultiIndex.from_tuples(eval_profit.index.str.split("[+]", expand=True).tolist())


In [None]:
eval_profit.index.names = ["Policy", "CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "gamma", "profit",
                           "ratio_treated", "TOL", "RSME", "Ratio_test"]

In [None]:
eval_profit.reset_index(drop=False, inplace=True)

In [None]:
eval_profit = eval_profit.reindex(columns=["Policy", "CATE_Estimator", "Conversion_Estimator", "PI_Estimator",
                                           "profit", "ratio_treated"])

In [None]:
eval_profit.to_excel(f"results/{today}_eval_profit_reg.xlsx")

Analyze thresholds

In [None]:
# change ordering of dicts
tau_by_fold_xbcf = []
tau_by_fold_CP = []

for folds in threshold_tuned:
    list = folds['single-model_hurdle_gbt']['CP_two-model_NN']['single-model_hurdle_gbt']
    tau_by_fold_CP.append(list)
    list = folds['single-model_hurdle_gbt']['xbcf_outcome_xbcf']['single-model_hurdle_gbt']
    tau_by_fold_xbcf.append(list)

In [None]:
print(np.mean(tau_by_fold_xbcf))
print(np.mean(tau_by_fold_CP))

## Uncertainty Evaluation for Regularization Policy

In [None]:
eval_profit = pd.concat([pd.DataFrame(x) for x in eval_profit_reg_left], axis=0, keys=range(len(eval_profit_reg_left)))
eval_profit.index.rename(["fold", "model"], inplace=True)

In [None]:
eval_profit = eval_profit.groupby("model").filter(lambda x: True)

In [None]:
eval_profit.index = pd.MultiIndex.from_tuples(eval_profit.index.str.split("[+]", expand=True).tolist())

In [None]:
eval_profit.index.names = ["Policy", "CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Lambda"]

In [None]:
eval_profit.index.names = ["Fold", "Model", ]

In [None]:
eval_profit.reset_index(drop=False, inplace=True)
eval_profit = eval_profit.reindex(columns=["Fold", "Model",
                                           "profit", "ratio_treated"])

In [None]:
eval_profit  #.index

In [None]:
PI_model_names = eval_profit['PI_Estimator'].unique()

In [None]:
CATE_model = 'xbcf_outcome_xbcf'
PI_model = 'xbcf_outcome_xbcf'
conversion_model = 'single-model_outcome_gbt'

gamma = '0.0'
model = str('Bayesian' + '+' + CATE_model + '+' + conversion_model)
model2 = str('Regularization' + '+' + CATE_model + '+' + conversion_model + '+' + PI_model)

In [None]:
CATE_model = 'single-model_hurdle_gbt'
conversion_model = 'single-model_hurdle_gbt'
PI_model = 'xbcf_outcome_xbcf'

gamma = '0.0'
model3 = str('Bayesian' + '+' + CATE_model + '+' + conversion_model)
model4 = str('Regularization' + '+' + CATE_model + '+' + conversion_model + '+' + PI_model)

In [None]:
CATE_model = 'two-model_hurdle_gbt'
conversion_model = 'two-model_hurdle_gbt'
PI_model = 'xbcf_outcome_xbcf'

gamma = '0.0'
model5 = str('Bayesian' + '+' + CATE_model + '+' + conversion_model)
model6 = str('Regularization' + '+' + CATE_model + '+' + conversion_model + '+' + PI_model)


In [None]:
df_plot = eval_profit[(eval_profit['Model'] == model) | (eval_profit['Model'] == model2)]

In [None]:
treatments = [eval_profit[(eval_profit['Model'] == model)].profit, eval_profit[(eval_profit['Model'] == model2)].profit,
              eval_profit[(eval_profit['Model'] == model3)].profit,
              eval_profit[(eval_profit['Model'] == model4)].profit,
              eval_profit[(eval_profit['Model'] == model5)].profit,
              eval_profit[(eval_profit['Model'] == model6)].profit]

In [None]:
eval_profit[(eval_profit['Model'] == model6)].profit
#model

In [None]:
#medians = [None, None, med1, med2]
#conf_intervals = [None, None, ci1, ci2]
labels = ['XBCF', 'XBCF_*', '1_Hurdle', '1_Hurdle_*', '2_Hurdle', '2_Hurdle_*']
fig, ax = plt.subplots()
pos = np.arange(len(treatments)) + 1
bp = ax.boxplot(treatments, sym='k+', positions=pos, showmeans=True)
ax.set_xticklabels(labels)
ax.set_xlabel('Policies')
ax.set_ylabel('Profit')
plt.setp(bp['whiskers'], color='k', linestyle='-')
plt.setp(bp['fliers'], markersize=3.0)
#plt.show()
plt.savefig(f"figures/regularization/uncertainty/{today}_profit_catescaled.pdf", bbox_inches='tight')
plt.close()

### 2 Sharpe Policy

In [None]:
# number of customers
start = 0
stop = len(predictions_test[0]['idx'])
step = 500

float_range_array = np.arange(start, stop, step)  #.round(2)
float_range_array

In [None]:
eval_profit_sharpe = []
for num_customers in float_range_array:  #float_range_array
    #eval_profit_sharpe.append(num_customers)
    for outcome_dict in predictions_test:
        #outcome_dict_train = prediction_dict["train"]
        #outcome_dict = prediction_dict["test"]

        # Calculate policy decision
        policy_dict_order = calc_sharpe_policy(
            treatment_dict=outcome_dict["treatment_spending"],
            pi_dict=outcome_dict['prediction_intervals'],
            conversion_dict=outcome_dict["conversion"],
            margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST,
            ordering=True)

        policy_dict = select_customers(policy_dict_order=policy_dict_order, num_customers=num_customers)

        policy, errors = calc_sharpe_policy_error(treatment_dict=outcome_dict["treatment_spending"],
                                                  pi_dict=outcome_dict['prediction_intervals'],
                                                  conversion_dict=outcome_dict["conversion"],
                                                  margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST,
                                                  ordering=True, calc_error=True, y=y[outcome_dict["idx"]],
                                                  g=g[outcome_dict["idx"]], tau_true=tau_response[outcome_dict["idx"]],
                                                  policy_dict=policy_dict, num_customers=num_customers)

        profit_sharpe = calc_policy_profit(
            policy_dict=policy_dict,
            y_true=y[outcome_dict["idx"]], c_true=c[outcome_dict["idx"]], g=g[outcome_dict["idx"]],
            margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST)

        profit_sharpe.update(errors)

        eval_profit_sharpe.append(profit_sharpe)


In [None]:
#np.save(f"results/sharpe_customers_{today}_errors.npy", eval_profit_sharpe, allow_pickle=True)

In [None]:
eval_profit_sharpe_save = eval_profit_sharpe

In [None]:
eval_profit_sharpe = pd.concat([pd.DataFrame(x) for x in eval_profit_sharpe], axis=0,
                               keys=range(len(eval_profit_sharpe)))
eval_profit_sharpe.index.rename(["fold", "model"], inplace=True)


In [None]:
#eval_profit = eval_profit.groupby("model").agg([np.mean, 'sem'])
eval_profit_sharpe = eval_profit_sharpe.groupby("model").mean()

In [None]:
eval_profit_sharpe.index = pd.MultiIndex.from_tuples(eval_profit_sharpe.index.str.split("[+]", expand=True).tolist())
eval_profit_sharpe

In [None]:
#eval_profit.index.names = ["CATE_Estimator","Conversion_Estimator","PI_Estimator","Sharpe"]
eval_profit_sharpe.index.names = ["CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe", "Customers"]

In [None]:
eval_profit_sharpe.reset_index(drop=False, inplace=True)

In [None]:
eval_profit_sharpe = eval_profit_sharpe.reindex(
    columns=["CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
             "Customers", "profit", "ratio_treated"])

In [None]:

eval_profit_sharpe.columns = ["CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe", "Customers", "profit",
                              "ratio_treated", "TOL", "root_mse", "Ratio_test"]
eval_profit_sharpe

In [None]:
eval_profit_sharpe.to_excel(f"results/{today}_eval_profit_sharpe_customers_errors.xlsx")

In [None]:
eval_profit_sharpe = np.load(f"results/sharpe_customers_2022-05-20_errors.npy", allow_pickle=True)
eval_profit_sharpe = pd.DataFrame(eval_profit_sharpe)

Get minimum and maximum profit values

In [None]:
eval_profit_sharpe_vals = np.load(f"results/sharpe_customers_2022-05-18.npy", allow_pickle=True)
eval_profit_sharpe_vals = pd.concat([pd.DataFrame(x) for x in eval_profit_sharpe_vals], axis=0,
                                    keys=range(len(eval_profit_sharpe_vals)))
eval_profit_sharpe_vals.index.rename(["fold", "model"], inplace=True)

In [None]:
#eval_profit = eval_profit.groupby("model").agg([np.mean, 'sem'])
eval_profit_sharpe_vals_min = eval_profit_sharpe_vals.groupby("model").quantile(q=0.05)
eval_profit_sharpe_vals_max = eval_profit_sharpe_vals.groupby("model").quantile(q=0.95)

In [None]:
eval_profit_sharpe_vals_min.index = pd.MultiIndex.from_tuples(
    eval_profit_sharpe_vals_min.index.str.split("[+]", expand=True).tolist())
eval_profit_sharpe_vals_max.index = pd.MultiIndex.from_tuples(
    eval_profit_sharpe_vals_max.index.str.split("[+]", expand=True).tolist())


In [None]:
#eval_profit.index.names = ["CATE_Estimator","Conversion_Estimator","PI_Estimator","Sharpe"]
eval_profit_sharpe_vals_min.index.names = ["CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
                                           "Customers"]
eval_profit_sharpe_vals_max.index.names = ["CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
                                           "Customers"]

In [None]:
eval_profit_sharpe_vals_min.reset_index(drop=False, inplace=True)
eval_profit_sharpe_vals_max.reset_index(drop=False, inplace=True)

In [None]:
eval_profit_sharpe_vals_min = eval_profit_sharpe_vals_min.reindex(
    columns=["CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
             "Customers", "profit", "ratio_treated"])
eval_profit_sharpe_vals_max = eval_profit_sharpe_vals_max.reindex(
    columns=["CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
             "Customers", "profit", "ratio_treated"])

Compare with Analytical Policy for fewer customers

In [None]:
eval_profit_analytical = []

for num_customers in float_range_array:
    for outcome_dict in predictions_test:
        #outcome_dict_train = prediction_dict["train"]
        #outcome_dict = prediction_dict["test"]

        # Calculate policy decision: gamma 0 for analytical policy
        policy_dict_order = calc_bayesian_uncertainty_policy_fixed_gamma(
            treatment_dict=outcome_dict["treatment_spending"],
            pi_dict=outcome_dict['prediction_intervals'],
            conversion_dict=outcome_dict["conversion"],
            margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST, gamma=0,
            tail='left', ordering=True)

        policy_dict_analytical = select_customers(policy_dict_order=policy_dict_order, num_customers=num_customers)

        policy, errors = calc_bayesian_uncertainty_policy_fixed_gamma_error(
            treatment_dict=outcome_dict["treatment_spending"],
            pi_dict=outcome_dict['prediction_intervals'],
            conversion_dict=outcome_dict["conversion"],
            margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST, gamma=0,
            ordering=True, calc_error=True, y=y[outcome_dict["idx"]],
            g=g[outcome_dict["idx"]], tau_true=tau_response[outcome_dict["idx"]], policy_dict=policy_dict_analytical,
            num_customers=num_customers)

        profit = calc_policy_profit(
            policy_dict=policy_dict_analytical,
            y_true=y[outcome_dict["idx"]], c_true=c[outcome_dict["idx"]], g=g[outcome_dict["idx"]],
            margin=MARGIN_RATIO, contact_cost=0, offer_cost=OFFER_COST)
        profit.update(errors)
        eval_profit_analytical.append(profit)

In [None]:
eval_profit_analytical = pd.concat([pd.DataFrame(x) for x in eval_profit_analytical], axis=0,
                                   keys=range(len(eval_profit_analytical)))
eval_profit_analytical.index.rename(["fold", "model"], inplace=True)

In [None]:
eval_profit_analytical

In [None]:
#eval_profit = eval_profit.groupby("model").agg([np.mean, 'sem'])
eval_profit_analytical = eval_profit_analytical.groupby("model").mean()

In [None]:
eval_profit_analytical.index = pd.MultiIndex.from_tuples(
    eval_profit_analytical.index.str.split("[+]", expand=True).tolist())
eval_profit_analytical

In [None]:
#eval_profit.index.names = ["CATE_Estimator","Conversion_Estimator","PI_Estimator","Sharpe"]
eval_profit_analytical.index.names = ["Policy", "CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
                                      "Customers", "profit", "ratio_treated", "TOL", "RSME", "Ratio_test"]

In [None]:
eval_profit_analytical.reset_index(drop=False, inplace=True)

In [None]:
eval_profit_analytical = eval_profit_analytical.reindex(
    columns=["Policy", "CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
             "Customers", "profit", "ratio_treated", "TOL", "RSME", "Ratio_test"])



In [None]:
eval_profit_analytical.to_excel(f"results/{today}_eval_profit_analytical_customers_errors.xlsx")

In [None]:
# to load up results again
eval_profit_analytical = np.load(f"results/analytical_customers_2022-05-20.npy", allow_pickle=True)
eval_profit_analytical = pd.DataFrame(eval_profit_analytical)

In [None]:
eval_profit_analytical.columns = ["Policy", "CATE_Estimator", "Conversion_Estimator", "PI_Estimator", "Sharpe",
                                  "Customers", "profit", "ratio_treated", "TOL", "root_mse", "Ratio_test"]


In [None]:
#np.save(f"results/analytical_customers_{today}.npy", eval_profit_analytical, allow_pickle=True)

In [None]:
import matplotlib.pyplot as plt

In [None]:
#PI_model_names = eval_profit_sharpe['PI_Estimator'].unique()
PI_model_names = ['Agnostic_QR_two-model', 'xbcf_outcome_xbcf']

## Plot Sharpe Policy: Errors

In [None]:
#CATE_model ='xbcf_outcome_xbcf'
#CATE_model = 'two-model_hurdle_gbt'
CATE_model = "single-model_hurdle_gbt"
#conversion_model = "single-model_outcome_gbt"
conversion_model = "Conversion-Rate__"
#conversion_model =  'two-model_hurdle_gbt'
#conversion_model = 'single-model_hurdle_gbt'
#PI_model = 'Agnostic_QR_two-model_hurdle'

In [None]:
# for RSME
eval_profit_analytical['Customers'] = eval_profit_analytical['Customers'].astype('int')
eval_profit_sharpe['Customers'] = eval_profit_sharpe['Customers'].astype('int')

#df_plot = eval_profit_analytical[(eval_profit_analytical['CATE_Estimator']==CATE_model) & (eval_profit_analytical['Conversion_Estimator']==conversion_model)&(eval_profit_analytical['PI_Estimator']=='Agnostic_QR_two-model')].sort_values(by=['Customers'])
df_plot = eval_profit_analytical[(eval_profit_analytical['CATE_Estimator'] == CATE_model) & (
            eval_profit_analytical['Conversion_Estimator'] == conversion_model) &
                                 (eval_profit_analytical['PI_Estimator'] == PI_model)].sort_values(by=['Customers'])
data_list = {}
for PI_model in PI_model_names:
    #df_plot_sharpe = {}
    df_plot_sharpe = eval_profit_sharpe[(eval_profit_sharpe['CATE_Estimator'] == CATE_model) & (
                eval_profit_sharpe['Conversion_Estimator'] == conversion_model) &
                                        (eval_profit_sharpe['PI_Estimator'] == PI_model)].sort_values(by=['Customers'])
    name = str(PI_model)
    data_list[name] = df_plot_sharpe

plt.plot(df_plot.Customers[1:], df_plot.root_mse[1:], label='Analytical')

for PI_model in PI_model_names:
    plt.plot(data_list[PI_model].Customers, data_list[PI_model].root_mse, alpha=0.5, label=f"Sharpe+{PI_model[0:22]}")

plt.title(f"Error for {CATE_model} with {conversion_model}.")
plt.legend(loc="upper right")
plt.xlabel("Maximum Number of Customers")
plt.ylabel("RSME")
plt.savefig(f"figures/{today}_sharpe_evaluate_RSME_{CATE_model}_{conversion_model}.pdf", bbox_inches='tight')
plt.close()

In [None]:
# for profit only:
eval_profit_analytical['Customers'] = eval_profit_analytical['Customers'].astype('int')
eval_profit_sharpe['Customers'] = eval_profit_sharpe['Customers'].astype('int')

df_plot = eval_profit_analytical[(eval_profit_analytical['CATE_Estimator'] == CATE_model) & (
            eval_profit_analytical['Conversion_Estimator'] == conversion_model)].sort_values(by=['Customers'])

data_list = {}
for PI_model in PI_model_names:
    #df_plot_sharpe = {}
    df_plot_sharpe = eval_profit_sharpe[(eval_profit_sharpe['CATE_Estimator'] == CATE_model) & (
                eval_profit_sharpe['Conversion_Estimator'] == conversion_model) &
                                        (eval_profit_sharpe['PI_Estimator'] == PI_model)].sort_values(by=['Customers'])
    name = str(PI_model)
    data_list[name] = df_plot_sharpe

plt.plot(df_plot.Customers, df_plot.profit, label='Analytical')

for PI_model in PI_model_names:
    plt.plot(data_list[PI_model].Customers, data_list[PI_model].profit, label=f"Sharpe+{PI_model[0:22]}", alpha=0.5)
plt.title(f"Profit for {CATE_model} with {conversion_model}.")
plt.legend(loc="lower right")
plt.xlabel("Maximum Number of Customers")
plt.ylabel("Profit")
plt.savefig(f"figures/{today}_sharpe_evaluate_{CATE_model}_{conversion_model}.pdf", bbox_inches='tight')
plt.close()

# Plots for Sharpe Policy: Profit


In [None]:
#CATE_model ='xbcf_outcome_xbcf'

#conversion_model = 'Conversion-Rate__'
#conversion_model = 'single-model_outcome_gbt'
#PI_model = 'Agnostic_QR_two-model_hurdle'
#CATE_model ='xbcf_outcome_xbcf'
#CATE_model = 'two-model_hurdle_gbt'
CATE_model = "single-model_hurdle_gbt"
conversion_model = "single-model_hurdle_gbt"
#conversion_model = "single-model_outcome_gbt"
#conversion_model = "Conversion-Rate__"
#conversion_model =  'two-model_hurdle_gbt'
#conversion_model = 'single-model_outcome_gbt'
#PI_model = 'Agnostic_QR_two-model_hurdle'

In [None]:
eval_profit_analytical['Customers'] = eval_profit_analytical['Customers'].astype('int')
eval_profit_sharpe['Customers'] = eval_profit_sharpe['Customers'].astype('int')
eval_profit_sharpe_vals_min['Customers'] = eval_profit_sharpe_vals_min['Customers'].astype('int')
eval_profit_sharpe_vals_max['Customers'] = eval_profit_sharpe_vals_max['Customers'].astype('int')

df_plot = eval_profit_analytical[(eval_profit_analytical['CATE_Estimator'] == CATE_model) & (
            eval_profit_analytical['Conversion_Estimator'] == conversion_model)].sort_values(by=['Customers'])

In [None]:
data_list = {}
data_list_min = {}
data_list_max = {}

for PI_model in PI_model_names:
    #df_plot_sharpe = {}
    df_plot_sharpe = eval_profit_sharpe[(eval_profit_sharpe['CATE_Estimator'] == CATE_model) & (
                eval_profit_sharpe['Conversion_Estimator'] == conversion_model) &
                                        (eval_profit_sharpe['PI_Estimator'] == PI_model)].sort_values(by=['Customers'])
    name = str(PI_model)
    data_list[name] = df_plot_sharpe

    df_plot_sharpe_min = eval_profit_sharpe_vals_min[(eval_profit_sharpe_vals_min['CATE_Estimator'] == CATE_model) & (
                eval_profit_sharpe_vals_min['Conversion_Estimator'] == conversion_model) &
                                                     (eval_profit_sharpe_vals_min[
                                                          'PI_Estimator'] == PI_model)].sort_values(by=['Customers'])
    name = str(PI_model)
    data_list_min[name] = df_plot_sharpe_min

    df_plot_sharpe_max = eval_profit_sharpe_vals_max[(eval_profit_sharpe_vals_max['CATE_Estimator'] == CATE_model) & (
                eval_profit_sharpe_vals_max['Conversion_Estimator'] == conversion_model) &
                                                     (eval_profit_sharpe_vals_max[
                                                          'PI_Estimator'] == PI_model)].sort_values(by=['Customers'])
    name = str(PI_model)
    data_list_max[name] = df_plot_sharpe_max

In [None]:
plt.plot(df_plot.Customers, df_plot.profit, label='Analytical')
plt.fill_between(df_plot.Customers, df_plot_sharpe_min.profit, df_plot_sharpe_max.profit
                 , edgecolor='darkblue', facecolor='lightblue', alpha=0.5)  #

#for PI_model in PI_model_names:
plt.plot(data_list[PI_model_names[0]].Customers, data_list[PI_model_names[0]].profit,
         label=f"Sharpe+{PI_model_names[0][0:22]}", alpha=0.5)
plt.fill_between(data_list[PI_model_names[0]].Customers, data_list_min[PI_model_names[0]].profit,
                 data_list_max[PI_model_names[0]].profit,
                 edgecolor='red', facecolor='orange', alpha=0.3)  # alpha=0.1

plt.plot(data_list[PI_model_names[1]].Customers, data_list[PI_model_names[1]].profit,
         label=f"Sharpe+{PI_model_names[1][0:22]}", alpha=0.5)
plt.fill_between(data_list[PI_model_names[1]].Customers, data_list_min[PI_model_names[1]].profit,
                 data_list_max[PI_model_names[1]].profit,
                 edgecolor="green", facecolor='lightgreen', alpha=0.3)  # alpha=0.1
plt.title(f"Profit for {CATE_model} with {conversion_model}.")
plt.legend(loc="lower right")
plt.xlabel("Maximum Number of Customers")
plt.ylabel("Profit")
plt.savefig(f"figures/{today}_sharpe_evaluate_uncertainty_{CATE_model}_{conversion_model}.pdf",
            bbox_inches='tight')
plt.close()


# Scaling

## Scale PIs for Oracle Predictions

In [None]:
for i in range(len(predictions_test)):
    for alpha in [0.05]:
        for model in predictions_test[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf']:
            print(model)
            #print(abs(max(predictions_test[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf'][model]['pred_low'])))
            predictions_test[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf'][model]['pred_low'] =            predictions_test[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf'][model]['pred_low'] - abs(
                max(predictions_test[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf'][model]['pred_low'])) - 1
            predictions_train[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf'][model]['pred_low'] =            predictions_train[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf'][model]['pred_low'] - abs(
                max(predictions_train[i]['prediction_intervals'][alpha]['xbcf_outcome_xbcf'][model]['pred_low'])) - 1


In [None]:
scale_test_oracle = [scale_PIs(outcome_dict["prediction_intervals"], tau_response[outcome_dict["idx"]])
                     for outcome_dict in predictions_test]

In [None]:
scale_train_oracle = [scale_PIs(outcome_dict["prediction_intervals"], tau_response[outcome_dict["idx"]])
                      for outcome_dict in predictions_train]

Now that we have some scaled i.e. oracle PIs for every model
we can use it with our targeting policy


In [None]:
for i in range(len(predictions_test)):
    for alpha in [0.05]:
        for model in predictions_test[i]['prediction_intervals'][alpha]:
            print(model)
            if predictions_test[i]['prediction_intervals'][alpha][model] == 'posterior':
                pass
            else:
                predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low'] = scale_test_oracle[i][alpha][model]['best_scaling'][0] * predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low']
                predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high'] = scale_test_oracle[i][alpha][model]['best_scaling'][1] * predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high']

In [None]:
for i in range(len(predictions_train)):
    for alpha in [0.05]:
        for model in predictions_train[i]['prediction_intervals'][alpha]:
            print(model)
            if predictions_train[i]['prediction_intervals'][alpha][model] == 'posterior':
                pass
            else:
                predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low'] =scale_train_oracle[i][alpha][model]['best_scaling'][0] * predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low']
                predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high'] = scale_train_oracle[i][alpha][model]['best_scaling'][1] * predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high']

In [None]:
np.save(f"results/prediction_test_oracle_cv.npy", predictions_test, allow_pickle=True)

np.save(f"results/prediction_train_oracle_cv.npy", predictions_train, allow_pickle=True)

## CATE Scaling:

In [None]:
scale_train = [
    scale_PIs_CATE(outcome_dict_train["prediction_intervals"], treatment_dict=outcome_dict_train["treatment_spending"]
                   )
    for outcome_dict_train in predictions_train]

In [None]:
scale_test = [
    scale_PIs_CATE(outcome_dict_test["prediction_intervals"], treatment_dict=outcome_dict_test["treatment_spending"]
                   )
    for outcome_dict_test in predictions_test]

To use it one the PI estimates:


In [None]:
for i in range(len(predictions_test)):
    for alpha in [0.05]:
        for model in predictions_test[i]['prediction_intervals'][alpha]:
            print(model)
            if predictions_test[i]['prediction_intervals'][alpha][model] == 'posterior':
                pass
            else:
                predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low'] = scale_test_oracle[i][alpha][model]['best_scaling'][0] *  predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low']
                predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high'] = scale_test_oracle[i][alpha][model]['best_scaling'][1] * predictions_test[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high']

In [None]:
for i in range(len(predictions_train)):
    for alpha in [0.05]:
        for model in predictions_train[i]['prediction_intervals'][alpha]:
            print(model)
            if predictions_train[i]['prediction_intervals'][alpha][model] == 'posterior':
                pass
            else:
                predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low'] = scale_train_oracle[i][alpha][model]['best_scaling'][0] *  predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_low']
                predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high'] = scale_train_oracle[i][alpha][model]['best_scaling'][1] * predictions_train[i]['prediction_intervals'][alpha][model]['quantile_model']['pred_high']


In [None]:
# save scaled results:
np.save(f"results/prediction_test_scaled_cv_CATE.npy", predictions_test, allow_pickle=True)
np.save(f"results/prediction_train_scaled_cv_CATE.npy", predictions_train, allow_pickle=True)



