# Impact Analyzer

In [None]:
import polars as pl
import pathlib
import pdstools


# Data

Export the VBD Actuals dataset from Dev Studio (in production) or VBD Scenario Planner Actuals (in BOE).

In [None]:
vbd_export_path = pathlib.Path("~/Downloads/Data-pxStrategyResult_ActualsExport_20240221T204009_GMT.zip").expanduser()
# TODO check no action
# vbd_export_path = pathlib.Path("~/Library/CloudStorage/OneDrive-PegasystemsInc/AI Chapter/projects/Impact Analyzer/Lmms-22-02-2023/Data-pxStrategyResult_Actuals_20230222T154840_GMT.zip").expanduser()
vbd_export = pdstools.readDSExport(vbd_export_path)


In [None]:
cols = vbd_export.columns
cols.sort()
cols

# Control Groups in Impact Analyzer

IA uses **pyReason**, **MktType**, **MktValue** and **ModelControlGroup** to define the various experiments. For the standard NBA decisions (no experiment), values are left empty (null). 

Prior to Impact Analyzer, or when turned off, Predictions from Prediction Studio manage two groups through the **ModelControlGroup** property. A value of **Test** is used for model driven arbitration, **Control** for the random control group (defaulted to 2% prior to Impact Analyzer).

When IA is on, the distinct values from just **MktValue** are sufficient to identify the different experiments.


In [None]:
default_ia_experiments = pl.DataFrame(
    {
        "Experiment": [
            "Random Eligible Action",
            "Propensity Only",
            "No Levers",
            "Only Eligibility Rules",
            "Random p",
            "ADM only",
            "Full NBA",
        ],
        "MktValue": [
            "NBAHealth_NBAPrioritization",
            "NBAHealth_PropensityPriority",
            "NBAHealth_LeverPriority",
            "NBAHealth_EngagementPolicy",
            "NBAHealth_ModelControl_1",
            "NBAHealth_ModelControl_2",
            "",
        ],
        "Description": [
            "Random eligible action (all engagement policies but randomly prioritized)",
            "Prioritized with propensity only (no V, C or L)",
            "Prioritized with no levers (only p, V and C)",
            "Only Eligibility policies applied (no Applicability or Suitability, and prioritized with pVCL)",
            "Prioritized with Random (p) only",
            "Prioritized with Adaptive model (p) only",
            "Arbitrated with your full NBA as configured",
        ],
    }
)
default_ia_experiments.to_pandas().style.hide(
    axis="index"
)

# Lift

Engagement Lift is calculated as (SuccessRate(test) - SuccessRate(control))/SuccessRate(control)

Value Lift is calculated as (ValueCapture(test) - ValueCapture(control))/ValueCapture(control)


In [None]:
fill_nulls = [pl.col("MktType").fill_null(""), pl.col("MktValue").fill_null("")]

if "FinalPropensity" not in vbd_export.columns:
    vbd_export = vbd_export.with_columns(pl.lit(None).alias("FinalPropensity"))

vbd_export.with_columns(
    pdstools.cdh_utils.parsePegaDateTimeFormats("pxOutcomeTime").dt.date()
).group_by(
    ["pyChannel", "pyDirection", "MktType", "MktValue", "pyReason", "ModelControlGroup"]
).agg(
    pl.count().alias("VBD Records"),
    pl.col("AggregateCount").cast(pl.Int64).sum().alias("Actions"),
    pl.sum("FinalPropensity"),
    pl.sum("pyPropensity"),
    pl.sum("pxPriority"),
    pl.col("pxOutcomeTime").max().alias("Most Recent Update"),
).with_columns(
    (pl.col("VBD Records") / pl.sum("VBD Records"))
    .over(["pyChannel", "pyDirection"])
    .alias("VBD Records Percentage"),
    (pl.col("Actions") / pl.sum("Actions"))
    .over(["pyChannel", "pyDirection"])
    .alias("Actions Percentage"),
    (pl.col("FinalPropensity") / pl.col("Actions")).alias("Avg FinalPropensity"),
    (pl.col("pyPropensity") / pl.col("Actions")).alias("Avg pyPropensity"),
    (pl.col("pxPriority") / pl.col("Actions")).alias("Avg pxPriority"),
).with_columns(
    fill_nulls
).drop(
    ["FinalPropensity", "pyPropensity", "pxPriority"]
).join(
    default_ia_experiments.lazy(),
    how="left",
    on=["MktValue"],
).sort(
    ["pyChannel", "pyDirection", "MktType", "MktValue", "pyReason", "ModelControlGroup"]
).filter(
    pl.col("pyChannel") == "Web"
).collect().to_pandas().style.format(
    {
        "Avg FinalPropensity": "{:.2%}",
        "Avg pyPropensity": "{:.2%}",
        "Avg pxPriority": "{:.3f}",
        "VBD Records Percentage": "{:.2%}",
        "Actions Percentage": "{:.2%}",
        "Most Recent Update":"{:%b '%y}"
    }
).hide(
    axis="index"
).hide(
    ["Description", "VBD Records Percentage"], axis="columns"
)