# Estimation of CATE conditional on host's race (white/black)

In [25]:
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.metrics import mean_squared_error, log_loss
import sklearn
import os
from sklearn.linear_model import LogisticRegression

In [26]:
RANDOM_SEED=42
np.random.seed(RANDOM_SEED)

## Load and Format the Data

In [27]:
main_data = pd.read_csv("../data/clean_data/merged_with_hosts.csv")
main_data_cleaned = main_data.dropna(subset=["yes", "host_race_black", "host_gender_M", "multiple_listings", "shared_property", "ten_reviews", "log_price"])

In [28]:
main_data_cleaned.head()

Unnamed: 0,host_response,response_date,number_of_messages,automated_coding,latitude,longitude,bed_type,property_type,cancellation_policy,number_guests,...,baltimore,dallas,los_angeles,sl,dc,total_guests,raw_black,prop_black,any_black,past_guest_merge
0,1,2015-07-19 08:26:17,2.0,1,34.0815,-118.27,Real Bed,House,Flexible,3.0,...,0,0,1,0,0,11.0,0.0,0.0,0.0,matched (3)
1,0,2015-07-14 14:13:39,,1,38.9107,-77.0198,,House,Moderate,2.0,...,0,0,0,0,1,167.0,0.0,0.0,0.0,matched (3)
2,2,2015-07-20 16:24:08,2.0,0,34.0047,-118.481,Pull-out Sofa,Apartment,Strict,1.0,...,0,0,1,0,0,19.0,0.0,0.0,0.0,matched (3)
3,10,2015-07-20 06:47:38,,0,34.0917,-118.282,,House,Strict,8.0,...,0,0,1,0,0,41.0,0.0,0.0,0.0,matched (3)
5,4,2015-07-18 18:07:19,,0,34.0809,-118.367,,Apartment,Strict,3.0,...,0,0,1,0,0,263.0,1.0,0.003802,1.0,matched (3)


In [29]:
confounders = main_data_cleaned[["host_gender_M", "multiple_listings", "shared_property", "ten_reviews", "log_price"]]
outcome = main_data_cleaned["yes"]
treatment = main_data_cleaned["guest_black"]
condition1 = main_data_cleaned["host_race_white"]
condition2 = main_data_cleaned["host_race_black"]

## Specify Nuisance Function Models

The next step is to specify models for the conditional expected outcome and propensity score

In [30]:
# specify a model for the conditional expected outcome

# make a function that returns a sklearn model for later use in k-folding
def make_Q_model():
    """Create outcome model for conditional expected outcome"""
    return RandomForestClassifier(n_estimators=100, max_depth=5, random_state=RANDOM_SEED)
Q_model = make_Q_model()

# Sanity check that chosen model actually improves test error
# A real analysis should give substantial attention to model selection and validation 

X_w_treatment = confounders.copy()
X_w_treatment["treatment"] = treatment

X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)
Q_model.fit(X_train, y_train)
y_pred = Q_model.predict_proba(X_test)[:,1]

test_ce=log_loss(y_test, y_pred)
print(f"Test CE of fit model {test_ce}") 
baseline_ce=log_loss(y_test, y_train.mean()*np.ones_like(y_test))
print(f"Test CE of no-covariate model {baseline_ce}")

Test CE of fit model 0.6804198257820814
Test CE of no-covariate model 0.6917462955382059


Because it is a randomized experiment. Treatment is randomly assigned and not confounded by X. Therefore, we can estimate the propensity score as g(x) = 0.5

## Use cross fitting to get get predicted outcomes and propensity scores for each unit

In [31]:
# helper functions to implement the cross fitting

def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):
    """
    Implements K fold cross-fitting for the model predicting the outcome Y. 
    That is, 
    1. Split data into K folds
    2. For each fold j, the model is fit on the other K-1 folds
    3. The fitted model is used to make predictions for each data point in fold j
    Returns two arrays containing the predictions for all units untreated, all units treated  

    Args:
    model: function that returns sklearn model (that implements fit and either predict_prob or predict)
    X: dataframe of variables to adjust for
    y: array of outcomes
    A: array of treatments
    n_splits: number of splits to use
    output_type: type of outcome, "binary" or "continuous"

    """
    predictions0 = np.full_like(A, np.nan, dtype=float)
    predictions1 = np.full_like(y, np.nan, dtype=float)
    if output_type == 'binary':
      kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)
    elif output_type == 'continuous':
      kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)

    # include the treatment as input feature
    X_w_treatment = X.copy()
    X_w_treatment["A"] = A

    # for predicting effect under treatment / control status for each data point 
    X0 = X_w_treatment.copy()
    X0["A"] = 0
    X1 = X_w_treatment.copy()
    X1["A"] = 1

    
    for train_index, test_index in kf.split(X_w_treatment, y):
      X_train = X_w_treatment.iloc[train_index]
      if not isinstance(y, pd.Series):
        y = pd.Series(y, index=X.index)
      y_train = y.iloc[train_index]
      q = make_model()
      q.fit(X_train, y_train)

      if output_type =='binary':
        predictions0[test_index] = q.predict_proba(X0.iloc[test_index])[:, 1]
        predictions1[test_index] = q.predict_proba(X1.iloc[test_index])[:, 1]
      elif output_type == 'continuous':
        predictions0[test_index] = q.predict(X0.iloc[test_index])
        predictions1[test_index] = q.predict(X1.iloc[test_index])

    assert np.isnan(predictions0).sum() == 0
    assert np.isnan(predictions1).sum() == 0
    return predictions0, predictions1

In [32]:
# Because it is a randomized experiment. Treatment is randomly assigned and not confounded by X. Therefore, we can estimate the propensity score as g(x) = 0.5
g = 0.5

## Estimate CATEs

In [33]:
def cate_aiptw(Q0, Q1, g, A, Y):
    """
    AIPTW estimator for CATE (same as ATE but applied to subgroups)
    """
    tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()
    scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat
    n = Y.shape[0]
    std_hat = np.std(scores) / np.sqrt(n)
    return tau_hat, std_hat

def estimate_cate_by_host_race(main_data_cleaned, confounders, outcome, treatment):
    """
    Estimate CATE conditioned on host race
    """
    results = {}
    
    # Get unique host races for stratification
    host_races = main_data_cleaned['host_race_black'].unique()
    race_labels = {0: 'White Host', 1: 'Black Host'}
    
    print("=== CATE Estimation by Host Race ===\n")
    
    for race in host_races:
        race_label = race_labels.get(race, f'Host Race {race}')
        print(f"--- {race_label} ---")
        
        # Filter data for this host race
        race_mask = main_data_cleaned['host_race_black'] == race
        race_confounders = confounders[race_mask].copy()
        race_outcome = outcome[race_mask].copy()
        race_treatment = treatment[race_mask].copy()
        
        # Reset indices to avoid issues
        race_confounders = race_confounders.reset_index(drop=True)
        race_outcome = race_outcome.reset_index(drop=True)
        race_treatment = race_treatment.reset_index(drop=True)
        
        print(f"Sample size: {len(race_outcome)}")
        print(f"Treatment rate: {race_treatment.mean():.3f}")
        print(f"Outcome rate: {race_outcome.mean():.3f}")
        
        # Check if we have enough data
        if len(race_outcome) < 50:
            print(f"Warning: Small sample size for {race_label}")
        
        # Check treatment variation
        if race_treatment.var() == 0:
            print(f"No treatment variation for {race_label}")
            continue
            
        try:
            # Step 1: Estimate Q (outcome model) using cross-fitting
            print("Estimating outcome model (Q)...")
            Q0, Q1 = outcome_k_fold_fit_and_predict(
                make_Q_model, 
                X=race_confounders, 
                y=race_outcome, 
                A=race_treatment,
                n_splits=10,
                output_type='binary'
            )
            
            # Step 2: Define g (propensity score)
            # Since this is a randomized experiment, g = 0.5
            g = 0.5  
            print(f"Using propensity score g = {g} (randomized experiment)")
            
            # Alternative: Estimate propensity score if needed
            # g_estimated = race_treatment.mean()  # Empirical treatment probability
            
            # Step 3: Estimate CATE using AIPTW
            print("Estimating CATE...")
            tau_hat, std_hat = cate_aiptw(Q0, Q1, g, race_treatment, race_outcome)
            
            # Calculate confidence interval
            ci_lower = tau_hat - 1.96 * std_hat
            ci_upper = tau_hat + 1.96 * std_hat
            
            # Store results
            results[race] = {
                'race_label': race_label,
                'n': len(race_outcome),
                'treatment_rate': race_treatment.mean(),
                'outcome_rate': race_outcome.mean(),
                'cate_estimate': tau_hat,
                'std_error': std_hat,
                'ci_lower': ci_lower,
                'ci_upper': ci_upper
            }
            
            print(f"CATE Estimate: {tau_hat:.4f}")
            print(f"Standard Error: {std_hat:.4f}")
            print(f"95% CI: [{ci_lower:.4f}, {ci_upper:.4f}]")
            
            # Interpretation
            if ci_lower > 0:
                print("Significant positive effect of black-sounding names")
            elif ci_upper < 0:
                print("Significant negative effect of black-sounding names")
            else:
                print("No significant effect")
                
        except Exception as e:
            print(f"Error estimating CATE for {race_label}: {str(e)}")
            
        print("\n")
    
    return results

In [34]:
results = estimate_cate_by_host_race(main_data_cleaned, confounders, outcome, treatment)
results

=== CATE Estimation by Host Race ===

--- White Host ---
Sample size: 5685
Treatment rate: 0.496
Outcome rate: 0.447
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: -0.0885
Standard Error: 0.0129
95% CI: [-0.1139, -0.0631]
Significant negative effect of black-sounding names


--- Black Host ---
Sample size: 483
Treatment rate: 0.499
Outcome rate: 0.513
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: -0.0776
Standard Error: 0.0451
95% CI: [-0.1659, 0.0108]
No significant effect




{np.int64(0): {'race_label': 'White Host',
  'n': 5685,
  'treatment_rate': np.float64(0.4962181178540018),
  'outcome_rate': np.float64(0.4474934036939314),
  'cate_estimate': np.float64(-0.08848817664764497),
  'std_error': np.float64(0.012943821377898445),
  'ci_lower': np.float64(-0.11385806654832592),
  'ci_upper': np.float64(-0.06311828674696401)},
 np.int64(1): {'race_label': 'Black Host',
  'n': 483,
  'treatment_rate': np.float64(0.4989648033126294),
  'outcome_rate': np.float64(0.5134575569358178),
  'cate_estimate': np.float64(-0.07755795416415273),
  'std_error': np.float64(0.04507968699429796),
  'ci_lower': np.float64(-0.16591414067297672),
  'ci_upper': np.float64(0.010798232344671269)}}

## Compare CATEs

In [35]:
def compare_cates(results):
    """
    Compare CATE estimates across host races
    """
    print("=== CATE Comparison ===")
    
    if len(results) < 2:
        print("Need at least 2 groups to compare")
        return
    
    for race, result in results.items():
        print(f"{result['race_label']}: {result['cate_estimate']:.4f} "
              f"(SE: {result['std_error']:.4f}, N: {result['n']})")
    
    # Test for difference between groups (approximate)
    if len(results) == 2:
        races = list(results.keys())
        cate1 = results[races[0]]['cate_estimate']
        cate2 = results[races[1]]['cate_estimate']
        se1 = results[races[0]]['std_error']
        se2 = results[races[1]]['std_error']
        
        diff = cate1 - cate2
        se_diff = np.sqrt(se1**2 + se2**2)
        
        print(f"\nDifference in CATEs: {diff:.4f} (SE: {se_diff:.4f})")
        print(f"95% CI for difference: [{diff - 1.96*se_diff:.4f}, {diff + 1.96*se_diff:.4f}]")

In [36]:
compare_cates(results)

=== CATE Comparison ===
White Host: -0.0885 (SE: 0.0129, N: 5685)
Black Host: -0.0776 (SE: 0.0451, N: 483)

Difference in CATEs: -0.0109 (SE: 0.0469)
95% CI for difference: [-0.1029, 0.0810]
