# Layer 4 â€” Action Simulation

Top-K selection, incremental ROI estimation, uplift curve, treatment sensitivity chart.

**Note:** This notebook is read-only for reports. It does NOT write to MD files.

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from pathlib import Path
import sys

ROOT = Path('.').resolve().parent
sys.path.insert(0, str(ROOT))

# Load data
DATA_DIR = ROOT / 'data'
possible_files = ['cfm_pltv_train.csv', 'cfm_pltv_train_1.csv', 'cfm_pltv_train_imoney.csv', 'clm_pltv_iamount.csv']

df = None
for fname in possible_files:
    fpath = DATA_DIR / fname
    if fpath.exists():
        df = pd.read_csv(fpath, nrows=100_000, low_memory=False)
        print(f'âœ… Loaded {len(df):,} rows from {fname}')
        break

if df is None:
    raise FileNotFoundError(f"No training data found")

# Currency settings
CURRENCY = "VND"
VND_TO_USD = 24000

def convert_currency(value, to_currency="VND"):
    if to_currency == "USD":
        return value / VND_TO_USD
    return value

def format_currency(value, currency="VND"):
    if currency == "USD":
        return f"${value:,.2f}"
    return f"â‚«{value:,.0f}"

currency_symbol = "â‚«" if CURRENCY == "VND" else "$"
print(f"ðŸ’± Currency: {CURRENCY} ({currency_symbol})")

In [None]:
# Simulate predictions (noisy version of actual for demo)
rng = np.random.default_rng(42)
df['pred_ltv30'] = df['ltv30'] * rng.uniform(0.6, 1.4, len(df)) + rng.normal(0, 0.5, len(df))
df['pred_ltv30'] = df['pred_ltv30'].clip(lower=0)
df_sorted = df.sort_values('pred_ltv30', ascending=False).reset_index(drop=True)

# CPI in selected currency
CPI = 10000 if CURRENCY == "VND" else 0.42
print(f"CPI: {format_currency(CPI, CURRENCY)}")

In [None]:
# Top-K analysis
k_range = list(range(1, 51))
results = []
for k in k_range:
    n = max(1, int(len(df_sorted) * k / 100))
    rev = df_sorted.head(n)['ltv30'].sum()
    baseline_rev = df_sorted.sample(n=n, random_state=42)['ltv30'].sum()
    cost = n * CPI
    roi = (rev - cost) / cost * 100 if cost > 0 else 0
    results.append({'k': k, 'n': n, 'revenue': rev, 'baseline': baseline_rev, 'cost': cost, 'roi': roi})
res_df = pd.DataFrame(results)
res_df.head(10)

In [None]:
# Uplift curve with currency conversion
revenue_display = convert_currency(res_df['revenue'], CURRENCY)
baseline_display = convert_currency(res_df['baseline'], CURRENCY)

fig = go.Figure()
fig.add_trace(go.Scatter(x=res_df['k'], y=revenue_display, name='Model (Top-K)'))
fig.add_trace(go.Scatter(x=res_df['k'], y=baseline_display, name='Random', line=dict(dash='dash')))
fig.update_layout(
    title=f'Cumulative Revenue: Model vs Random - {CURRENCY}',
    xaxis_title='Top-K (%)',
    yaxis_title=f'Revenue ({currency_symbol})'
)
fig.show()

In [None]:
# Treatment sensitivity with currency conversion
marginal = res_df['revenue'].diff().fillna(res_df['revenue'].iloc[0])
marginal_display = convert_currency(marginal, CURRENCY)

fig = go.Figure()
fig.add_trace(go.Bar(x=res_df['k'], y=marginal_display, name='Marginal Rev', marker_color='lightblue'))
fig.add_trace(go.Scatter(x=res_df['k'], y=res_df['roi'], name='ROI %', yaxis='y2',
                         line=dict(color='red', width=2)))
fig.update_layout(
    title=f'Treatment Sensitivity - {CURRENCY}',
    xaxis_title='Top-K (%)',
    yaxis=dict(title=f'Marginal Revenue ({currency_symbol})'),
    yaxis2=dict(title='ROI %', side='right', overlaying='y')
)
fig.show()

In [None]:
## Summary

This notebook provides action simulation analysis including:
- Top-K user selection strategy
- ROI estimation at different selection thresholds
- Uplift curve comparing model vs random selection
- Treatment sensitivity analysis with marginal revenue

**Note:** This is an exploratory notebook. It does NOT write to report MD files.