PHASE 6
Decision Optimization: Budget- & Risk-Aware Customer Targeting

In [27]:
# STEP 6.1 — Load Phase 5 Artifact (Immutable)

In [28]:
import pandas as pd
import numpy as np

clv_df = pd.read_parquet("phase5_expected_clv.parquet")
print(clv_df.shape)
clv_df.head()


(5881, 2)


Unnamed: 0,Customer ID,expected_clv
0,12346.0,28353.198765
1,12347.0,5959.262516
2,12348.0,5518.680179
3,12349.0,3631.677336
4,12350.0,2648.932017


In [29]:
# STEP 6.2 — Define the Decision Problem

### Decision Problem Formulation

At each decision cycle, the business must select a subset of customers
to target with a retention or engagement action.

Each action incurs a cost and is subject to a fixed budget constraint.
The objective is to maximize the total expected incremental customer
lifetime value (CLV) generated by the chosen actions.

This phase formulates CLV as a decision optimization problem,
not merely a predictive metric.


In [30]:
# STEP 6.3 — Introduce an Action Space

a = 1 → retention offer

a = 0 → do nothing

In [31]:
# STEP 6.4 — Define Action Cost & Budget

In [32]:
# Simple, explainable numbers:
ACTION_COST = 100        # cost per customer
TOTAL_BUDGET = 50000     # total marketing budget


In [33]:
# Maximum customers you can target:
MAX_CUSTOMERS = TOTAL_BUDGET // ACTION_COST
MAX_CUSTOMERS


500

In [34]:
# STEP 6.5 — Define Incremental CLV (Key Assumption)

In [35]:
UPLIFT_FACTOR = 0.15   # 15% incremental CLV if targeted


In [36]:
clv_df["incremental_clv"] = (
    UPLIFT_FACTOR * clv_df["expected_clv"]
)


In [37]:
# STEP 6.6 — Risk Control

In [38]:
CAP = clv_df["incremental_clv"].quantile(0.95)

clv_df["incremental_clv_capped"] = (
    clv_df["incremental_clv"].clip(upper=CAP)
)


In [39]:
# STEP 6.7 — Optimization Objective

In [40]:
decision_df = (
    clv_df
    .sort_values("incremental_clv_capped", ascending=False)
    .head(MAX_CUSTOMERS)
    .copy()
)

decision_df["action"] = 1
decision_df.head()


Unnamed: 0,Customer ID,expected_clv,incremental_clv,incremental_clv_capped,action
3688,16067.0,33157.36088,4973.604132,2690.666824,1
3650,16029.0,118061.39629,17709.209443,2690.666824,1
3640,16019.0,29720.003412,4458.000512,2690.666824,1
3634,16013.0,34200.923207,5130.138481,2690.666824,1
400,12748.0,19355.381809,2903.307271,2690.666824,1


In [41]:
# STEP 6.8 — Compute Expected Portfolio Gain

In [42]:
total_incremental_value = decision_df["incremental_clv_capped"].sum()
total_cost = len(decision_df) * ACTION_COST

total_incremental_value, total_cost


(np.float64(1262575.0536835992), 50000)

In [43]:
# STEP 6.9 — Compare Against Heuristic Baselines

In [44]:
# Baseline 1: Random targeting
random_df = clv_df.sample(MAX_CUSTOMERS, random_state=42)
random_gain = (
    UPLIFT_FACTOR * random_df["expected_clv"]
).sum()


In [45]:
# Baseline 2: Frequency-only targeting (naive)
# Merge frequency from Phase 2
freq_df = (
    pd.read_parquet("phase2_customer_state.parquet")
    [["Customer ID", "frequency"]]
    .drop_duplicates()
)

baseline_df = clv_df.merge(freq_df, on="Customer ID")

freq_top_df = (
    baseline_df
    .sort_values("frequency", ascending=False)
    .head(MAX_CUSTOMERS)
)

freq_gain = (
    UPLIFT_FACTOR * freq_top_df["expected_clv"]
).sum()


In [46]:
# STEP 6.10 — Decision Comparison Table


In [47]:
comparison_df = pd.DataFrame({
    "Strategy": ["CLV-Optimized", "Frequency-Only", "Random"],
    "Expected Incremental Value": [
        total_incremental_value,
        freq_gain,
        random_gain
    ]
})

comparison_df


Unnamed: 0,Strategy,Expected Incremental Value
0,CLV-Optimized,1262575.0
1,Frequency-Only,4272724.0
2,Random,405134.6
