In [None]:

# # EconML with PyTorch Neural Network Base Learners for Uplift Modeling
# S-Learner, T-Learner, X-Learner using MLP from PyTorch

# 📦 Install dependencies
!pip install econml xgboost scikit-learn pandas matplotlib seaborn torch

# 📁 Upload the updated dataset: peacock_user_data_with_renewed_and_propensity.csv
from google.colab import files
uploaded = files.upload()

# 📊 Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from econml.metalearners import SLearner, TLearner, XLearner
from sklearn.metrics import mean_squared_error

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.base import BaseEstimator, RegressorMixin

# 🧠 Define a simple feedforward neural network regressor
class MLPRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, input_dim=None, hidden_dim=64, lr=0.001, epochs=20, batch_size=64):
        self.hidden_dim = hidden_dim
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.input_dim = input_dim
        self.model = None
        self.scaler = None

    def _build_model(self):
        return nn.Sequential(
            nn.Linear(self.input_dim, self.hidden_dim),
            nn.ReLU(),
            nn.Linear(self.hidden_dim, 1)
        )

    def fit(self, X, y):
        X = np.array(X).astype(np.float32)
        y = np.array(y).reshape(-1, 1).astype(np.float32)
        self.input_dim = X.shape[1]
        self.model = self._build_model()
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        loss_fn = nn.MSELoss()

        dataset = TensorDataset(torch.tensor(X), torch.tensor(y))
        dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        self.model.train()
        for epoch in range(self.epochs):
            for xb, yb in dataloader:
                optimizer.zero_grad()
                preds = self.model(xb)
                loss = loss_fn(preds, yb)
                loss.backward()
                optimizer.step()
        return self

    def predict(self, X):
        X = np.array(X).astype(np.float32)
        self.model.eval()
        with torch.no_grad():
            preds = self.model(torch.tensor(X)).numpy()
        return preds.ravel()

# 📥 Load data
df = pd.read_csv("peacock_user_data_with_renewed_and_propensity.csv")
X = df.drop(columns=["user_id", "assigned_promo", "renewed", "propensity_score"])
T = df["assigned_promo"]
Y = df["renewed"]

# Known ground-truth uplift function
tau_x = (
    0.4
    - 0.7 * df["prior_engagement_score"]
    + 0.1 * (df["device_type"] == "roku").astype(int)
    + 0.05 * (df["has_kids_profile"] == 1).astype(int)
)

# Split
X_train, X_test, T_train, T_test, Y_train, Y_test, tau_train, tau_test = train_test_split(
    X, T, Y, tau_x, test_size=0.3, random_state=42
)

# ⚙️ Preprocess
numeric_features = ["tenure_months", "prior_engagement_score", "weekly_watch_hours", "num_devices"]
categorical_features = ["device_type", "payment_method", "account_type", "region", "has_kids_profile", "promo_eligible"]

preprocessor = ColumnTransformer([
    ("num", StandardScaler(), numeric_features),
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
])

X_train_proc = preprocessor.fit_transform(X_train)
X_test_proc = preprocessor.transform(X_test)

# Neural Net base model
base_nn = MLPRegressor(input_dim=X_train_proc.shape[1], hidden_dim=64, lr=0.001, epochs=30)

# Learners
s_learner = SLearner(model=base_nn)
t_learner = TLearner(models=base_nn)
x_learner = XLearner(models=base_nn)

# Fit
s_learner.fit(Y_train, T_train, X=X_train_proc)
t_learner.fit(Y_train, T_train, X=X_train_proc)
x_learner.fit(Y_train, T_train, X=X_train_proc)

# Predict CATE
cate_s = s_learner.effect(X_test_proc)
cate_t = t_learner.effect(X_test_proc)
cate_x = x_learner.effect(X_test_proc)

# Evaluate PEHE
pehe_s = np.sqrt(mean_squared_error(tau_test, cate_s))
pehe_t = np.sqrt(mean_squared_error(tau_test, cate_t))
pehe_x = np.sqrt(mean_squared_error(tau_test, cate_x))

# 📊 Plot
plt.figure(figsize=(12, 4))
for i, (name, cates, pehe) in enumerate([
    ("S-Learner", cate_s, pehe_s),
    ("T-Learner", cate_t, pehe_t),
    ("X-Learner", cate_x, pehe_x)
]):
    plt.subplot(1, 3, i+1)
    sns.histplot(cates, kde=True, bins=30)
    plt.title(f"{name} (PEHE: {pehe:.3f})")
    plt.xlabel("Estimated CATE")
plt.tight_layout()
plt.show()
