In [None]:
# # set for namespaced execution, otherwise leave empty
# namespace = ""
# catalog entry for tag dictionary
cat_td = "td"
# catalog entry for uplift report bulk output
cat_output_df = f"{namespace}.bulk_output"
# catalog entry for uplift report bulk ctrl
cat_ctrl_df = f"{namespace}.bulk_ctrl"

In [None]:
%matplotlib inline

In [None]:
from datetime import datetime
from itertools import product
import logging

import kedro

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib
from matplotlib import pyplot as plt

In [None]:
sns.set_context('notebook')

In [None]:
pd.set_option("max_columns", 100)
pd.set_option("max_rows", 1000)

In [None]:
# silence warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# load kedro context
from project_clisham.optimus_core.reporting_html.utils import load_context, mprint
logging.getLogger("kedro").setLevel(logging.WARNING)

context = load_context(max_depth=6)
parameters = context.params
io = context.io

In [None]:
# silence logging
logging.getLogger("kedro.io").setLevel(logging.WARNING)
logging.getLogger("kedro.pipeline").setLevel(logging.WARNING)
logging.getLogger("kedro.config").setLevel(logging.WARNING)

In [None]:
# load data
td = io.load(cat_td)
output_df = io.load(cat_output_df)
ctrl_df = io.load(cat_ctrl_df)
opt_target = parameters[namespace]['opt_target']

# Uplift Report

In [None]:
report_timestamp = (
    f"#### generated {datetime.now().strftime('%b-%d-%Y %H:%M:%S %z')} "
    f"with environment `{context.env}`"
)
mprint(report_timestamp)

## Results

In [None]:
target = td.select("target", opt_target)[0]
mprint("The goal was to *maximize* `{}`.".format(td.name(target)))
mprint(
    "Average uplift against predicted was **{:.3f}**, or **{:.2f}% (median)**.".format(
        output_df[(target, "optimized_vs_predicted")].mean(),
        output_df[(target, "optimized_vs_predicted_pct")].median(),
    )
)
mprint(
    "Average uplift against actual was **{:.3f}**, or **{:.2f}% (median)**.".format(
        output_df[(target, "optimized_vs_actual")].mean(),
        output_df[(target, "optimized_vs_actual_pct")].median(),
    )
)

### Details

In [None]:
output_df.filter(like='_vs_').describe()

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
output_df.filter(items=product([target], ['actual', 'target_pred_current', 'target_pred_optimized'])).plot(ax=ax, color=['grey', 'blue', 'red'])
plt.title('Uplift simulation timeline')
plt.ylabel(td.name(target));

## Controls

In [None]:
ctrl_df.describe()

In [None]:
def plot_me(sub_df):
    """ Plot current and suggested control values """
    x = np.zeros((len(ctrl_df), 2))
    x[:, 1] = 1
    y = sub_df[["current", "suggested"]].values

    fig, ax = plt.subplots(figsize=(8, 6))
    ax.plot(x.T, y.T, color='black', alpha=0.1, marker="o")
    plt.xticks(ticks=[0, 1], labels=["current", "suggested"])
    plt.ylabel(ctrl)
    plt.title(f"{td.name(ctrl)} suggestions")
    
    for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(16);

In [None]:
all_controls = sorted(list(set(ctrl_df.columns.get_level_values(0))))

In [None]:
for ctrl in all_controls:
    plot_me(ctrl_df[ctrl])