# Predictions View

Currently using PDC data to reproduce the view of Predictions from a data dump: Lift, CTR, Performance trend charts.

Will be generalized to support a direct export from the Prediction Studio datamart table.

In [None]:
from pathlib import Path
import polars as pl
import json

def read_parquet(filename):
    return pl.read_parquet(filename)

data_export = "<YOUR FILEPATH HERE>"

if data_export.endswith(".parquet"):
    predictions_raw_data = read_parquet(Path(data_export).expanduser())
elif data_export.endswith(".json"):
    with open(Path(data_export).expanduser()) as f:
        predictions_raw_data = pl.from_dicts(json.loads(f.read())["pxResults"]).cast(
            {
                "Negatives": pl.Float64,
                "Positives": pl.Float64,
                "ResponseCount": pl.Float64,
                "TotalPositives": pl.Float64,
                "TotalResponses": pl.Float64,
                "Performance": pl.Float64,
            }
        )

predictions_raw_data = (
    predictions_raw_data
    .filter(pl.col("ModelType").str.starts_with("Prediction"))
    .with_columns(CTR=pl.col("Positives")/(pl.col("Positives")+pl.col("Negatives")))
    .sort("SnapshotTime")
)

predictions_raw_data.head()

In [None]:
predictions_raw_data.filter(pl.col("SnapshotTime") == pl.col("SnapshotTime").last()).sort(
    ["SnapshotTime", "ModelName", "ModelType"]
).select(
    ["ModelName", "ModelType", "Negatives", "Positives", "Performance", "CTR"]
).to_pandas().style.hide()

In [None]:
import plotly.express as px

px.line(predictions_raw_data, x="SnapshotTime", y="Performance", color="ModelName")

In [None]:
px.line(predictions_raw_data, x="SnapshotTime", y="CTR", color="ModelName")

In [None]:
from pdstools import Prediction


p = Prediction(predictions_raw_data.lazy())
p.summary_by_channel().collect().to_pandas().style


In [None]:
px.line(
    p.summary_by_channel(keep_trend_data=True)
    .collect()
    .filter(pl.col("isMultiChannelPrediction").not_())
    .filter(pl.col("Channel") != "Unknown")
    .sort(["SnapshotTime"]),
    x="SnapshotTime",
    y="Performance",
    color="Channel",
)

In [None]:
px.line(
    p.summary_by_channel(keep_trend_data=True)
    .collect()
    .filter(pl.col("isMultiChannelPrediction").not_())
    .filter(pl.col("Channel") != "Unknown")
    .sort(["SnapshotTime"]),
    x="SnapshotTime",
    y="Lift",
    color="Channel",
)