# Causal ML, Uplift Modeling Part 1

## Compare winning model from Two-Model to Single-Model Approaches

In [None]:
# 5. Enhanced Qini Evaluation
def evaluate_qini(uplift, y_test, t_test, model_name):
    qini = qini_auc_score(y_test, uplift, t_test)

    # Use a loop to calculate uplift at different percentages
    k_values = [i / 100 for i in range(1, 100)]  # Generate k as float values from 0.01 to 1
    uplift_cumulative = []

    for k in k_values:
        uplift_k = uplift_at_k(y_test, uplift, t_test, strategy='overall', k=k)
        uplift_cumulative.append(uplift_k)

    print(f"{model_name} Qini Score: {qini:.4f}")

    # Plot the Qini Curve
    plt.plot(k_values, uplift_cumulative, label=f'{model_name} Model')
    plt.plot([0, 1], [0, max(uplift_cumulative)], '--', label='Random')
    plt.xlabel('Proportion of Population Targeted')
    plt.ylabel('Cumulative Uplift')
    plt.title(f'Qini Curve - {model_name}')
    plt.legend()
    plt.show()

# Compare Two-Model and S-Learner
print("### Two-Model Approach Evaluation ###")
evaluate_qini(uplift_two_model, y_test, t_test, "Two-Model")

print("### S-Learner Evaluation ###")
evaluate_qini(uplift_s_learner, y_test, t_test, "S-Learner")


### Two-Model Approach Evaluation ###


NameError: name 'uplift_two_model' is not defined

In [None]:
# 6. Tabular Comparison of Qini Scores
qini_two_model = qini_auc_score(y_test, uplift_two_model, t_test)
qini_s_learner = qini_auc_score(y_test, uplift_s_learner, t_test)

comparison_df = pd.DataFrame({
    'Model': ['Two-Model', 'S-Learner'],
    'Qini Score': [qini_two_model, qini_s_learner]
})
print("\n### Qini Score Comparison ###")
print(comparison_df)

## Import Libraries & Data

In [24]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from sklift.metrics import uplift_at_k,qini_auc_score
from sklift.datasets import fetch_hillstrom # our dataset!
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, cross_val_score
import optuna
from sklearn.metrics import roc_auc_score, log_loss
from causalml.metrics import auuc_score

In [25]:
# Load Hillstrom dataset
def load_hillstrom():
    dataset = fetch_hillstrom()
    df = dataset.data
    # Segment has 3 options, womens email, mens email and no email, so we can create 'treatment' by choosing one of the two treatments here
    df['segment'] = dataset.treatment
    # There are 3 outcomes, visit, conversion and spend, we can choose one of them as our target
    df['visit'] = dataset.target
    print(df.info())
    print(df.isna().sum())
    return df

df = load_hillstrom()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64000 entries, 0 to 63999
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   recency          64000 non-null  int64  
 1   history_segment  64000 non-null  object 
 2   history          64000 non-null  float64
 3   mens             64000 non-null  int64  
 4   womens           64000 non-null  int64  
 5   zip_code         64000 non-null  object 
 6   newbie           64000 non-null  int64  
 7   channel          64000 non-null  object 
 8   segment          64000 non-null  object 
 9   visit            64000 non-null  int64  
dtypes: float64(1), int64(5), object(4)
memory usage: 4.9+ MB
None
recency            0
history_segment    0
history            0
mens               0
womens             0
zip_code           0
newbie             0
channel            0
segment            0
visit              0
dtype: int64


## Explore Data

In [26]:
# The treatment
df['segment'].value_counts()

segment
Womens E-Mail    21387
Mens E-Mail      21307
No E-Mail        21306
Name: count, dtype: int64

In [27]:
# Let's just use men's email first and create a treatment variable to 1/0 the treatment/control
df = df.loc[df['segment'].isin(['Mens E-Mail','No E-Mail'])]
df['treatment'] = df['segment'].map({'Mens E-Mail':1,'No E-Mail':0})
df['treatment'].value_counts(normalize=True)

treatment
1    0.500012
0    0.499988
Name: proportion, dtype: float64

In [28]:
# This dataset has visit, conversion, and revenue as the target variables, we are going to use visit as the target variable
df['target'] = df['visit'].copy()
df['target'].value_counts()

target
0    36457
1     6156
Name: count, dtype: int64

In [29]:
# Customer purchased mens merchandise in the past year
df['mens'].value_counts()

mens
1    23526
0    19087
Name: count, dtype: int64

In [30]:
# Customer purchased womens merchandise in the past year
df['womens'].value_counts()

womens
1    23417
0    19196
Name: count, dtype: int64

In [31]:
# Only a few customers purchased from both mens and womens merch in the past year
df.groupby(['mens','womens']).size()

mens  womens
0     1         19087
1     0         19196
      1          4330
dtype: int64

In [32]:
# segments for TTM spend
df['history_segment'].value_counts()

history_segment
1) $0 - $100        15336
2) $100 - $200       9527
3) $200 - $350       8134
4) $350 - $500       4221
5) $500 - $750       3249
6) $750 - $1,000     1266
7) $1,000 +           880
Name: count, dtype: int64

In [33]:
# Actual TTM spend
df['history'].describe()

count    42613.000000
mean       241.859315
std        256.574723
min         29.990000
25%         64.500000
50%        157.000000
75%        325.210000
max       3345.930000
Name: history, dtype: float64

In [34]:
# Months since last purchase
df['recency'].value_counts()

recency
1     5934
2     5074
10    5022
9     4330
3     3899
4     3406
6     3048
5     2985
7     2720
8     2337
11    2316
12    1542
Name: count, dtype: int64

In [35]:
# New custome in past year
df['newbie'].value_counts()

newbie
1    21381
0    21232
Name: count, dtype: int64

In [36]:
# TTM purchase channels - must be an old dataset given the phone count
df['channel'].value_counts()

channel
Web             18863
Phone           18567
Multichannel     5183
Name: count, dtype: int64

In [37]:
# Classifies urban rural and suburban
df['zip_code'].value_counts()

zip_code
Surburban    19126
Urban        17105
Rural         6382
Name: count, dtype: int64

In [38]:
# Binarize the zip code and channel variables
df = pd.get_dummies(df, columns=['zip_code'], drop_first=True, dtype=int)  # Encode categorical variable
df = pd.get_dummies(df, columns=['channel'], drop_first=True, dtype=int)  # Encode categorical variable
df = df.drop(columns=['history_segment','segment','visit']) # Drop unnecessary features

In [39]:
# One grand view of our final dataset! Looks ready for modeling.
df.head()

Unnamed: 0,recency,history,mens,womens,newbie,treatment,target,zip_code_Surburban,zip_code_Urban,channel_Phone,channel_Web
1,6,329.08,1,1,1,0,0,0,0,0,1
3,9,675.83,1,0,1,1,0,0,0,0,1
8,9,675.07,1,1,1,1,0,0,0,1,0
13,2,101.64,0,1,0,1,1,0,1,0,1
14,4,241.42,0,1,1,0,0,0,0,0,0


In [40]:
# It appears everything is just about equal in terms of the treatment group and the control group for feature means, except the target which is ok
df.groupby('treatment').mean()

# Even if it wasn't, we could still run the model using the covariates as features, but would need to adjust for the imbalance in the treatment groups

Unnamed: 0_level_0,recency,history,mens,womens,newbie,target,zip_code_Surburban,zip_code_Urban,channel_Phone,channel_Web
treatment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,5.749695,240.882653,0.553224,0.547639,0.501971,0.106167,0.451751,0.40092,0.437764,0.439923
1,5.773642,242.835931,0.550946,0.551415,0.501525,0.182757,0.44591,0.401887,0.43366,0.445394


## Split Train Test Treatment

In [41]:
# Split data into train/test
def split_data(df):
    X = df.drop(columns=['treatment', 'target'])
    y = df['target']
    treatment = df['treatment']
    return train_test_split(X, y, treatment, test_size=0.3, random_state=42)

X_train, X_test, y_train, y_test, t_train, t_test = split_data(df)

## Run Two-Model and Single-Model Approaches

In [42]:
# Model Optimization with Optuna
def optimize_model(trial, X, y, model_type):
    # Set the hyperparameters to optimize and the ranges for xgboost
    if model_type == 'xgboost':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0),
            'gamma': trial.suggest_float('gamma', 0, 5),
        }
        model = XGBClassifier(**params, eval_metric='logloss')
        # Even though the eval metric for xgboost is logloss, we are evaluating the hyperparameters on maximizing AUC
    # Set the hyperparameters to optimize the ranges for random forest
    elif model_type == 'random_forest':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 300),
            'max_depth': trial.suggest_int('max_depth', 3, 20),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
            'max_features': trial.suggest_float('max_features', 0.6, 1.0),
        }
        model = RandomForestClassifier(**params, random_state=42)
    else:
        raise ValueError("Unsupported model type")

    scores = cross_val_score(model, X, y, cv=3, scoring='roc_auc')
    return np.mean(scores)

In [43]:
# Run Optuna for Both Models
def run_optuna(X, y, model_type, model_seg, n_trials=50):
    # Create a study object to maximize the AUC
    study = optuna.create_study(direction='maximize')
    # optimize the study based on the input parameters
    study.optimize(lambda trial: optimize_model(trial, X, y, model_type), n_trials=n_trials)
    print(f"Best parameters for {model_type} for {model_seg}: {study.best_params}")
    return study.best_params

In [44]:
# Train and Evaluate Models
def train_and_evaluate(X_train, X_test, y_train, y_test, params, model_type, model_seg):
    # We'll look at xgboost and random forest, this is to train the final mdoel after optuna works it's magic
    if model_type == 'xgboost':
        model = XGBClassifier(**params,  eval_metric='logloss')
    elif model_type == 'random_forest':
        model = RandomForestClassifier(**params, random_state=42)
    else:
        raise ValueError("Unsupported model type")

    # Fit the model and predict probabilities on the test dataset
    model.fit(X_train, y_train)
    y_pred = model.predict_proba(X_test)[:, 1]

    # Grab the AUC & LogLoss metrics for the best model
    auc = roc_auc_score(y_test, y_pred)
    logloss = log_loss(y_test, y_pred)
    print(f"{model_type} AUC for {model_seg}: {auc:.4f}\n{model_type} Log Loss for {model_seg}: {logloss:.4f}")
    return model,y_pred

In [45]:
# Optimize and Train Separate Models for Two-Model Approach
def two_model_approach_with_optuna(X_train, X_test, y_train, y_test, t_train):
    
    # Use function run_optuna to optimize the treatment model for xgboost and random forest adn return the optimal hyperparameters

    # Optimize treatment model for xgboost and random forest
    X_treatment = X_train[t_train == 1]
    y_treatment = y_train[t_train == 1]
    params_treatment_xgboost = run_optuna(X_treatment, y_treatment, 'xgboost','treatment')
    params_treatment_randomforest = run_optuna(X_treatment, y_treatment, 'random_forest','treatment')

    # Optimize control model for xgboost and random forest
    X_control = X_train[t_train == 0]
    y_control = y_train[t_train == 0]
    params_control_xgboost = run_optuna(X_control, y_control, 'xgboost','control')
    params_control_randomforest = run_optuna(X_control, y_control, 'random_forest','control')


    # Optimize single model for xgboost and random forest
    X_single_train = X_train.copy()
    X_single_train['treatment'] = t_train
    X_single_test_1 = X_test.copy()
    X_single_test_1['treatment'] = 1
    X_single_test_0 = X_test.copy()
    X_single_test_0['treatment'] = 0
    params_single_xgboost = run_optuna(X_single_train, y_train, 'xgboost','single')
    params_single_randomforest = run_optuna(X_single_train, y_train, 'random_forest','single')

    # Train final models using function train_and_evaluate
    model_treatment_xgboost,preds_treatment_xgboost = train_and_evaluate(X_treatment, X_test, y_treatment, y_test, params_treatment_xgboost, 'xgboost','treatment')
    model_treatment_randomforest,preds_treatment_randomforest = train_and_evaluate(X_treatment, X_test, y_treatment, y_test, params_treatment_randomforest, 'random_forest','treatment')
    model_control_xgboost,preds_control_xgboost = train_and_evaluate(X_control, X_test, y_control, y_test, params_control_xgboost, 'xgboost','control')
    model_control_randomforest,preds_control_randomforest = train_and_evaluate(X_control, X_test, y_control, y_test, params_control_randomforest, 'random_forest','control')
    model_single_1_xgboost,preds_single_1_xgboost = train_and_evaluate(X_single_train, X_single_test_1, y_train, y_test, params_single_xgboost, 'xgboost','single_1')
    model_single_1_randomforest,preds_single_1_randomforest = train_and_evaluate(X_single_train, X_single_test_1, y_train, y_test, params_single_randomforest, 'random_forest','single_1')
    model_single_0_xgboost,preds_single_0_xgboost = train_and_evaluate(X_single_train, X_single_test_0, y_train, y_test, params_single_xgboost, 'xgboost','single_0')
    model_single_0_randomforest,preds_single_0_randomforest = train_and_evaluate(X_single_train, X_single_test_0, y_train, y_test, params_single_randomforest, 'random_forest','single_0')

    return model_treatment_xgboost, preds_treatment_xgboost, model_treatment_randomforest, preds_treatment_randomforest, model_control_xgboost, preds_control_xgboost, model_control_randomforest, preds_control_randomforest, model_single_1_xgboost, preds_single_1_xgboost, model_single_1_randomforest, preds_single_1_randomforest, model_single_0_xgboost, preds_single_0_xgboost, model_single_0_randomforest, preds_single_0_randomforest

In [46]:
# Execute the Two-Model Approach with Optuna usng function "two_model_approach_with_optuna"
model_treatment_xgboost, preds_treatment_xgboost, model_treatment_randomforest, preds_treatment_randomforest, model_control_xgboost, preds_control_xgboost, model_control_randomforest, preds_control_randomforest, model_single_1_xgboost, preds_single_1_xgboost, model_single_1_randomforest, preds_single_1_randomforest, model_single_0_xgboost, preds_single_0_xgboost, model_single_0_randomforest, preds_single_0_randomforest = two_model_approach_with_optuna(X_train, X_test, y_train, y_test, t_train)

[I 2024-12-23 14:32:09,074] A new study created in memory with name: no-name-d84b42d9-ea7f-425f-b98a-6e3fe6b461b5
[I 2024-12-23 14:32:09,475] Trial 0 finished with value: 0.6118543375745146 and parameters: {'n_estimators': 227, 'max_depth': 4, 'learning_rate': 0.011102732427673404, 'subsample': 0.6359825674692446, 'colsample_bytree': 0.871518162874717, 'gamma': 3.7131452388142305}. Best is trial 0 with value: 0.6118543375745146.
[I 2024-12-23 14:32:09,818] Trial 1 finished with value: 0.6143241628081094 and parameters: {'n_estimators': 187, 'max_depth': 10, 'learning_rate': 0.026785674478213208, 'subsample': 0.7329636811500612, 'colsample_bytree': 0.6155214126819654, 'gamma': 4.657185058913145}. Best is trial 1 with value: 0.6143241628081094.
[I 2024-12-23 14:32:10,104] Trial 2 finished with value: 0.6124102102848042 and parameters: {'n_estimators': 135, 'max_depth': 8, 'learning_rate': 0.013050200513450376, 'subsample': 0.7391253840695207, 'colsample_bytree': 0.6434284711150586, 'gamm

KeyboardInterrupt: 

In [25]:
# Uplift scores
def create_uplift_scores(preds_treatment_xgboost,
                        preds_control_xgboost,
                        preds_treatment_randomforest,
                        preds_control_randomforest,
                        preds_single_1_xgboost,
                        preds_single_1_randomforest,
                        preds_single_0_xgboost,
                        preds_single_0_randomforest):

    uplift_two_model_rf = preds_treatment_randomforest - preds_control_randomforest

    uplift_two_model_xg = preds_treatment_xgboost - preds_control_xgboost

    uplift_single_model_xg = preds_single_1_xgboost - preds_single_0_xgboost
    
    uplift_single_model_rf = preds_single_1_randomforest - preds_single_0_randomforest
    
    return uplift_two_model_rf, uplift_two_model_xg, uplift_single_model_xg, uplift_single_model_rf

uplift_two_model_rf, uplift_two_model_xg, uplift_single_model_xg, uplift_single_model_rf = create_uplift_scores(preds_treatment_xgboost,
                                                                                                                preds_control_xgboost,
                                                                                                                preds_treatment_randomforest,
                                                                                                                preds_control_randomforest,
                                                                                                                preds_single_1_xgboost,
                                                                                                                preds_single_1_randomforest,
                                                                                                                preds_single_0_xgboost,
                                                                                                                preds_single_0_randomforest
                                                                                                                )

In [None]:
# Create a dataframe with test results
results_df = pd.DataFrame({
    'y_true': y_test,               # Actual outcomes
    'treatment': t_test,            # Treatment indicators
    'uplift_rf': uplift_two_model_rf,         # Uplift predictions from Random Forest
    'uplift_xgb': uplift_two_model_xg        # Uplift predictions from XGBoost
})
results_df['uplift_diff'] = results_df['uplift_rf'] - results_df['uplift_xgb']
results_df.head()

## Compare Two-Model Approaches and Single-Model Approaches

In [None]:
# Tabular Comparison of Qini Scores
qini_two_model_rf = qini_auc_score(y_test, uplift_two_model_rf, t_test)
qini_two_model_xg = qini_auc_score(y_test, uplift_two_model_xg, t_test)

comparison_df = pd.DataFrame({
    'Model': ['Two-Model RF', 'Two-Model XG'],
    'Qini Score': [qini_two_model_rf, qini_two_model_xg]
})
print("\n### Qini Score Comparison ###")
print(comparison_df)

In [None]:
# Enhanced Qini Evaluation
def evaluate_qini(uplift, y_test, t_test, model_name):
    qini = qini_auc_score(y_test, uplift, t_test)

    uplift_k = uplift_at_k(y_test, uplift, t_test, strategy='overall', k=k)

    print(f"{model_name} Qini Score: {qini:.4f}")

    # Plot the Qini Curve
    plt.plot(k_values, uplift_k, label=f'{model_name} Model')
    plt.plot([0, 1], [0, max(uplift_cumulative)], '--', label='Random')
    plt.xlabel('Proportion of Population Targeted')
    plt.ylabel('Cumulative Uplift')
    plt.title(f'Qini Curve - {model_name}')
    plt.legend()
    plt.show()

# Compare Two-Model and S-Learner
print("### Two-Model Random Forest Approach Evaluation ###")
evaluate_qini(uplift_two_model_rf, y_test, t_test, "Two-Model Random Forest")

print("### Two-Model XGBoost Approach Evaluatio ###")
evaluate_qini(uplift_two_model_xg, y_test, t_test, "Two-Model XGBoost")

In [80]:
def calculate_uplift_metrics(df,uplift, n_groups=5):
    """
    Calculate common uplift model evaluation metrics.
    
    Parameters:
    y_true: array-like, actual outcome (visits)
    treatment: array-like, treatment indicator (0/1)
    uplift_scores: array-like, predicted uplift scores
    n_groups: int, number of groups for AUUC calculation
    
    Returns:
    dict with uplift metrics
    """
    # Sort by uplift scores
    df = df.sort_values(by=uplift, ascending=False)
    y_true = df['y_true'].values
    treatment = df['treatment'].values
    
    # Calculate cumulative gains
    n_samples = len(y_true)
    group_size = n_samples // n_groups
    
    gains = []
    for i in range(n_groups):
        start_idx = i * group_size
        end_idx = (i + 1) * group_size if i < n_groups - 1 else n_samples
        
        group_treat = treatment[start_idx:end_idx]
        group_outcome = y_true[start_idx:end_idx]
        
        # Calculate treatment and control response rates
        treat_rate = np.mean(group_outcome[group_treat == 1])
        ctrl_rate = np.mean(group_outcome[group_treat == 0])
        
        # Calculate uplift
        uplift = treat_rate - ctrl_rate
        gains.append(uplift)
    
    # Calculate metrics
    metrics = {
        'AUUC': np.trapz(gains) / len(gains),  # Area Under the Uplift Curve
        'Qini': np.sum(gains),  # Qini coefficient
        'top_group_uplift': gains[0],  # Uplift in highest scored group
        'uplift_by_group': gains
    }
    
    return metrics

In [83]:
xgb_metrics = calculate_uplift_metrics(
   df=results_df,
   uplift='uplift_xgb'
   )

rf_metrics = calculate_uplift_metrics(
   df=results_df,
   uplift='uplift_rf'
   )

In [None]:
xgb_metrics

In [None]:
rf_metrics

In [88]:
xgb_metrics = calculate_uplift_metrics(
   df=results_df,
   uplift='uplift_xgb'
   )

rf_metrics = calculate_uplift_metrics(
   df=results_df,
   uplift='uplift_rf'
   )

In [None]:
# 5. Enhanced Qini Evaluation
def evaluate_qini(uplift, y_test, t_test, model_name):
    qini = qini_auc_score(y_test, uplift, t_test)

    # Use a loop to calculate uplift at different percentages
    k_values = [i / 100 for i in range(1, 100)]  # Generate k as float values from 0.01 to 1
    uplift_cumulative = []

    for k in k_values:
        uplift_k = uplift_at_k(y_test, uplift, t_test, strategy='overall', k=k)
        uplift_cumulative.append(uplift_k)

    print(f"{model_name} Qini Score: {qini:.4f}")

    # Plot the Qini Curve
    plt.plot(k_values, uplift_cumulative, label=f'{model_name} Model')
    plt.plot([0, 1], [0, max(uplift_cumulative)], '--', label='Random')
    plt.xlabel('Proportion of Population Targeted')
    plt.ylabel('Cumulative Uplift')
    plt.title(f'Qini Curve - {model_name}')
    plt.legend()
    plt.show()

# Compare Two-Model and S-Learner
print("### Two-Model Approach Evaluation ###")
evaluate_qini(uplift_two_model, y_test, t_test, "Two-Model")

print("### S-Learner Evaluation ###")
evaluate_qini(uplift_s_learner, y_test, t_test, "S-Learner")


### Two-Model Approach Evaluation ###


NameError: name 'uplift_two_model' is not defined

In [None]:
# 6. Tabular Comparison of Qini Scores
qini_two_model = qini_auc_score(y_test, uplift_two_model, t_test)
qini_s_learner = qini_auc_score(y_test, uplift_s_learner, t_test)

comparison_df = pd.DataFrame({
    'Model': ['Two-Model', 'S-Learner'],
    'Qini Score': [qini_two_model, qini_s_learner]
})
print("\n### Qini Score Comparison ###")
print(comparison_df)