In [None]:

# # Deep Causal Models with EconML: DRLearner and DeepIV (Colab Version)

# 📦 Install dependencies
!pip install econml xgboost scikit-learn pandas matplotlib seaborn torch

# 📁 Upload your data: peacock_user_data_with_renewed_and_propensity.csv
from google.colab import files
uploaded = files.upload()

# 📊 Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from econml.dr import DRLearner
from econml.iv.nnet import DeepIVEstimator
from econml.utilities import hstack

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

# 📥 Load data
df = pd.read_csv("peacock_user_data_with_renewed_and_propensity.csv")

# Feature setup
X = df.drop(columns=["user_id", "assigned_promo", "renewed", "propensity_score"])
T = df["assigned_promo"]
Y = df["renewed"]

# Known CATE for evaluation
tau_x = (
    0.4
    - 0.7 * df["prior_engagement_score"]
    + 0.1 * (df["device_type"] == "roku").astype(int)
    + 0.05 * (df["has_kids_profile"] == 1).astype(int)
)

# Train/test split
X_train, X_test, T_train, T_test, Y_train, Y_test, tau_train, tau_test = train_test_split(
    X, T, Y, tau_x, test_size=0.3, random_state=42
)

# Column Transformer
numeric_features = ["tenure_months", "prior_engagement_score", "weekly_watch_hours", "num_devices"]
categorical_features = ["device_type", "payment_method", "account_type", "region", "has_kids_profile", "promo_eligible"]

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric_features),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
])

X_train_proc = preprocessor.fit_transform(X_train)
X_test_proc = preprocessor.transform(X_test)

# 🔧 DRLearner with neural net models
from sklearn.ensemble import GradientBoostingRegressor
from econml.metalearners import TLearner
from econml.models import KerasModel

import tensorflow as tf
from tensorflow.keras import layers, models

def build_keras_model(input_shape):
    model = models.Sequential([
        layers.Input(shape=(input_shape,)),
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1)
    ])
    model.compile(optimizer="adam", loss="mse")
    return model

# DRLearner
dr_learner = DRLearner(
    model_propensity=GradientBoostingRegressor(),
    model_regression=GradientBoostingRegressor(),
    model_final=KerasModel(model_builder=lambda: build_keras_model(X_train_proc.shape[1]),
                           fit_kwargs={'epochs': 30, 'verbose': 0})
)

dr_learner.fit(Y_train, T_train, X=X_train_proc)
cate_dr = dr_learner.effect(X_test_proc)

# PEHE evaluation
from sklearn.metrics import mean_squared_error
pehe_dr = np.sqrt(mean_squared_error(tau_test, cate_dr))

# Plot
plt.figure(figsize=(6, 4))
sns.histplot(cate_dr, kde=True, bins=30)
plt.title(f"DRLearner (PEHE: {pehe_dr:.3f})")
plt.xlabel("Estimated CATE")
plt.grid(True)
plt.tight_layout()
plt.show()

# 🚀 Optional: DeepIV if IV is available (instrument z ≠ T)
# Simulate an instrument for demo
Z = df["promo_eligible"]
Z_train, Z_test = train_test_split(Z, test_size=0.3, random_state=42)

# DeepIV requires outcome, treatment, instrument, and covariates
deepiv = DeepIVEstimator(
    n_components=10,
    m=lambda z, x: build_keras_model(x.shape[1]),
    h=lambda t, x: build_keras_model(x.shape[1]),
    n_samples=1,
    optimizer="adam",
    loss="mse",
    first_stage_options={'epochs': 30, 'verbose': 0},
    second_stage_options={'epochs': 30, 'verbose': 0}
)

# DeepIV fit
deepiv.fit(Y_train, T_train, Z_train, X=X_train_proc)

# Estimate CATE at T=1 vs T=0
cate_deepiv = deepiv.effect(X_test_proc, T0=0, T1=1)
pehe_deepiv = np.sqrt(mean_squared_error(tau_test, cate_deepiv))

# Plot DeepIV
plt.figure(figsize=(6, 4))
sns.histplot(cate_deepiv, kde=True, bins=30)
plt.title(f"DeepIV (PEHE: {pehe_deepiv:.3f})")
plt.xlabel("Estimated CATE")
plt.grid(True)
plt.tight_layout()
plt.show()
