# Estimation of CATE conditional on whether the host have African American guests before

### (measured as proportion of African American guests before)

In [2]:
import numpy as np
import pandas as pd
import scipy as sp
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.metrics import mean_squared_error, log_loss
import sklearn
import os
from sklearn.linear_model import LogisticRegression

In [3]:
RANDOM_SEED=42
np.random.seed(RANDOM_SEED)

## Load and Format the Data

In [5]:
main_data = pd.read_csv("merged_with_hosts.csv")
main_data_cleaned = main_data.dropna(subset=["yes", "host_race_black", "host_gender_M", "multiple_listings", "shared_property", "ten_reviews", "log_price"])

In [6]:
main_data_cleaned.head()

Unnamed: 0,host_response,response_date,number_of_messages,automated_coding,latitude,longitude,bed_type,property_type,cancellation_policy,number_guests,...,baltimore,dallas,los_angeles,sl,dc,total_guests,raw_black,prop_black,any_black,past_guest_merge
0,1,2015-07-19 08:26:17,2.0,1,34.0815,-118.27,Real Bed,House,Flexible,3.0,...,0,0,1,0,0,11.0,0.0,0.0,0.0,matched (3)
1,0,2015-07-14 14:13:39,,1,38.9107,-77.0198,,House,Moderate,2.0,...,0,0,0,0,1,167.0,0.0,0.0,0.0,matched (3)
2,2,2015-07-20 16:24:08,2.0,0,34.0047,-118.481,Pull-out Sofa,Apartment,Strict,1.0,...,0,0,1,0,0,19.0,0.0,0.0,0.0,matched (3)
3,10,2015-07-20 06:47:38,,0,34.0917,-118.282,,House,Strict,8.0,...,0,0,1,0,0,41.0,0.0,0.0,0.0,matched (3)
5,4,2015-07-18 18:07:19,,0,34.0809,-118.367,,Apartment,Strict,3.0,...,0,0,1,0,0,263.0,1.0,0.003802,1.0,matched (3)


In [23]:
confounders = main_data_cleaned[["host_race_black", "host_gender_M", "multiple_listings", "shared_property", "ten_reviews", "log_price"]]
outcome = main_data_cleaned["yes"]
treatment = main_data_cleaned["guest_black"]
condition_prop = main_data_cleaned["prop_black"]
condition_bin = main_data_cleaned["any_black"]

## Specify Nuisance Function Models

In [13]:
# specify a model for the conditional expected outcome

# make a function that returns a sklearn model for later use in k-folding
def make_Q_model():
    """Create outcome model for conditional expected outcome"""
    return RandomForestClassifier(n_estimators=100, max_depth=5, random_state=RANDOM_SEED)
Q_model = make_Q_model()

# Sanity check that chosen model actually improves test error
# A real analysis should give substantial attention to model selection and validation 

X_w_treatment = confounders.copy()
X_w_treatment["treatment"] = treatment

X_train, X_test, y_train, y_test = train_test_split(X_w_treatment, outcome, test_size=0.2)
Q_model.fit(X_train, y_train)
y_pred = Q_model.predict_proba(X_test)[:,1]

test_ce=log_loss(y_test, y_pred)
print(f"Test CE of fit model {test_ce}") 
baseline_ce=log_loss(y_test, y_train.mean()*np.ones_like(y_test))
print(f"Test CE of no-covariate model {baseline_ce}")

Test CE of fit model 0.6706547612189971
Test CE of no-covariate model 0.6887227090920056


Because it is a randomized experiment. Treatment is randomly assigned and not confounded by X. Therefore, we can estimate the propensity score as g(x) = 0.5

## Use cross fitting to get get predicted outcomes and propensity scores for each unit

In [14]:
# helper functions to implement the cross fitting

def outcome_k_fold_fit_and_predict(make_model, X:pd.DataFrame, y:np.array, A:np.array, n_splits:int, output_type:str):
    """
    Implements K fold cross-fitting for the model predicting the outcome Y. 
    That is, 
    1. Split data into K folds
    2. For each fold j, the model is fit on the other K-1 folds
    3. The fitted model is used to make predictions for each data point in fold j
    Returns two arrays containing the predictions for all units untreated, all units treated  

    Args:
    model: function that returns sklearn model (that implements fit and either predict_prob or predict)
    X: dataframe of variables to adjust for
    y: array of outcomes
    A: array of treatments
    n_splits: number of splits to use
    output_type: type of outcome, "binary" or "continuous"

    """
    predictions0 = np.full_like(A, np.nan, dtype=float)
    predictions1 = np.full_like(y, np.nan, dtype=float)
    if output_type == 'binary':
      kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)
    elif output_type == 'continuous':
      kf = KFold(n_splits=n_splits, shuffle=True, random_state=RANDOM_SEED)

    # include the treatment as input feature
    X_w_treatment = X.copy()
    X_w_treatment["A"] = A

    # for predicting effect under treatment / control status for each data point 
    X0 = X_w_treatment.copy()
    X0["A"] = 0
    X1 = X_w_treatment.copy()
    X1["A"] = 1

    
    for train_index, test_index in kf.split(X_w_treatment, y):
      X_train = X_w_treatment.iloc[train_index]
      if not isinstance(y, pd.Series):
        y = pd.Series(y, index=X.index)
      y_train = y.iloc[train_index]
      q = make_model()
      q.fit(X_train, y_train)

      if output_type =='binary':
        predictions0[test_index] = q.predict_proba(X0.iloc[test_index])[:, 1]
        predictions1[test_index] = q.predict_proba(X1.iloc[test_index])[:, 1]
      elif output_type == 'continuous':
        predictions0[test_index] = q.predict(X0.iloc[test_index])
        predictions1[test_index] = q.predict(X1.iloc[test_index])

    assert np.isnan(predictions0).sum() == 0
    assert np.isnan(predictions1).sum() == 0
    return predictions0, predictions1

In [15]:
# Because it is a randomized experiment. Treatment is randomly assigned and not confounded by X. Therefore, we can estimate the propensity score as g(x) = 0.5
g = 0.5

## Estimate CATEs

In [16]:
def cate_aiptw(Q0, Q1, g, A, Y):
    """
    AIPTW estimator for CATE (same as ATE but applied to subgroups)
    """
    tau_hat = (Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g)).mean()
    scores = Q1 - Q0 + A*(Y-Q1)/g - (1-A)*(Y-Q0)/(1-g) - tau_hat
    n = Y.shape[0]
    std_hat = np.std(scores) / np.sqrt(n)
    return tau_hat, std_hat

In [33]:
def estimate_cate_binary(main_data_cleaned, confounders, outcome, treatment, condition, 
                         condition_name="any black guests before"):
    """
    Estimate CATE conditioned on a continuous variable (neighborhood diversity)
    
    Parameters:
    -----------
    method : str
        - "quantiles": Split into quantile-based groups
        - "threshold": Split based on specific thresholds
        - "bins": Split into equal-width bins
    n_groups : int
        Number of groups to create (for quantiles/bins methods)
    """
    results = {}
    
    print(f"=== CATE Estimation by {condition_name} ===\n")

    # binary groups of having African American guests or not
    condition_groups = pd.cut(condition, bins=[-np.inf, 0, np.inf], labels=False, include_lowest=True)
    group_labels = {0: "Never had black guests", 1: "Had black guests before"}

    # Estimate CATE for each group
    unique_groups = np.unique(condition_groups[~pd.isna(condition_groups)])
    
    for group in unique_groups:
        group_label = group_labels.get(group, f'Group {group}')
        print(f"--- {group_label} ---")
        
        # Filter data for this group
        group_mask = condition_groups == group
        group_confounders = confounders[group_mask].copy()
        group_outcome = outcome[group_mask].copy()
        group_treatment = treatment[group_mask].copy()
        group_condition_values = condition[group_mask].copy()
        
        # Reset indices to avoid issues
        group_confounders = group_confounders.reset_index(drop=True)
        group_outcome = group_outcome.reset_index(drop=True)
        group_treatment = group_treatment.reset_index(drop=True)
        
        print(f"Sample size: {len(group_outcome)}")
        print(f"Treatment rate: {group_treatment.mean():.3f}")
        print(f"Outcome rate: {group_outcome.mean():.3f}")
        
        # Check if we have enough data
        if len(group_outcome) < 50:
            print(f"Warning: Small sample size for {group_label}")
        
        # Check treatment variation
        if group_treatment.var() == 0:
            print(f"No treatment variation for {group_label}")
            continue
            
        try:
            # Step 1: Estimate Q (outcome model) using cross-fitting
            print("Estimating outcome model (Q)...")
            Q0, Q1 = outcome_k_fold_fit_and_predict(
                make_Q_model, 
                X=group_confounders, 
                y=group_outcome, 
                A=group_treatment,
                n_splits=10,
                output_type='binary'
            )
            
            # Step 2: Define g (propensity score)
            # Since this is a randomized experiment, g = 0.5
            g = 0.5  
            print(f"Using propensity score g = {g} (randomized experiment)")
            
            # Alternative: Estimate propensity score if needed
            # g_estimated = group_treatment.mean()  # Empirical treatment probability
            
            # Step 3: Estimate CATE using AIPTW
            print("Estimating CATE...")
            tau_hat, std_hat = cate_aiptw(Q0, Q1, g, group_treatment, group_outcome)
            
            # Calculate confidence interval
            ci_lower = tau_hat - 1.96 * std_hat
            ci_upper = tau_hat + 1.96 * std_hat
            
            # Store results
            results[group] = {
                'group_label': group_label,
                'n': len(group_outcome),
                'treatment_rate': group_treatment.mean(),
                'outcome_rate': group_outcome.mean(),
                'cate_estimate': tau_hat,
                'std_error': std_hat,
                'ci_lower': ci_lower,
                'ci_upper': ci_upper
            }
            
            print(f"CATE Estimate: {tau_hat:.4f}")
            print(f"Standard Error: {std_hat:.4f}")
            print(f"95% CI: [{ci_lower:.4f}, {ci_upper:.4f}]")
            
            # Interpretation
            if ci_lower > 0:
                print("Significant positive effect of black-sounding names")
            elif ci_upper < 0:
                print("Significant negative effect of black-sounding names")
            else:
                print("No significant effect")
                
        except Exception as e:
            print(f"Error estimating CATE for {group_label}: {str(e)}")
            
        print("\n")
    
    return results

In [34]:
results_binary = estimate_cate_binary(main_data_cleaned, confounders, outcome, treatment, 
                                      condition_bin, condition_name="any black guests before")
results_binary

=== CATE Estimation by any black guests before ===

--- Never had black guests ---
Sample size: 4404
Treatment rate: 0.500
Outcome rate: 0.417
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: -0.1032
Standard Error: 0.0146
95% CI: [-0.1318, -0.0746]
Significant negative effect of black-sounding names


--- Had black guests before ---
Sample size: 1764
Treatment rate: 0.488
Outcome rate: 0.541
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: -0.0442
Standard Error: 0.0236
95% CI: [-0.0904, 0.0020]
No significant effect




{0: {'group_label': 'Never had black guests',
  'n': 4404,
  'treatment_rate': 0.49977293369663944,
  'outcome_rate': 0.4173478655767484,
  'cate_estimate': -0.10321419097664655,
  'std_error': 0.014602404141980777,
  'ci_lower': -0.13183490309492887,
  'ci_upper': -0.07459347885836423},
 1: {'group_label': 'Had black guests before',
  'n': 1764,
  'treatment_rate': 0.4880952380952381,
  'outcome_rate': 0.5408163265306123,
  'cate_estimate': -0.04420398038300179,
  'std_error': 0.023573358852434015,
  'ci_lower': -0.09040776373377246,
  'ci_upper': 0.001999802967768881}}

In [44]:
def estimate_cate_threshold(main_data_cleaned, confounders, outcome, treatment, condition, 
                            condition_name="prop_black", thresholds=[0, 0.1]):
    """
    Estimate CATE conditioned on a continuous variable (neighborhood diversity)
    
    Parameters:
    -----------
    method : str
        - "quantiles": Split into quantile-based groups
        - "threshold": Split based on specific thresholds
        - "bins": Split into equal-width bins
    n_groups : int
        Number of groups to create (for quantiles/bins methods)
    """
    results = {}
    
    print(f"=== CATE Estimation by {condition_name} ===\n")
    print(f"Continuous variable summary:")
    print(f"Mean: {condition.mean():.4f}")
    print(f"Std: {condition.std():.4f}")
    print(f"Min: {condition.min():.4f}, Max: {condition.max():.4f}")
    print(f"Quantiles: {condition.quantile([0.25, 0.5, 0.75]).values}\n")

    # Default thresholds for black proportion:
    # Low: = 0, Medium: 0-0.1, High: > 0.1
    condition_groups = pd.cut(condition, bins=[-np.inf] + thresholds + [np.inf], 
                                labels=False, include_lowest=True)
    group_labels = {
        0: f"Never had black guests before: = {thresholds[0]}",
        1: f"Medium proportion of black guests: {thresholds[0]}-{thresholds[1]}",
        2: f"High proportion of black guests: > {thresholds[1]}"
        }


    # Estimate CATE for each group
    unique_groups = np.unique(condition_groups[~pd.isna(condition_groups)])
    
    for group in unique_groups:
        group_label = group_labels.get(group, f'Group {group}')
        print(f"--- {group_label} ---")
        
        # Filter data for this group
        group_mask = condition_groups == group
        group_confounders = confounders[group_mask].copy()
        group_outcome = outcome[group_mask].copy()
        group_treatment = treatment[group_mask].copy()
        group_condition_values = condition[group_mask].copy()
        
        # Reset indices to avoid issues
        group_confounders = group_confounders.reset_index(drop=True)
        group_outcome = group_outcome.reset_index(drop=True)
        group_treatment = group_treatment.reset_index(drop=True)
        
        print(f"Sample size: {len(group_outcome)}")
        print(f"Treatment rate: {group_treatment.mean():.3f}")
        print(f"Outcome rate: {group_outcome.mean():.3f}")
        print(f"Mean {condition_name}: {group_condition_values.mean():.3f}")
        print(f"{condition_name} range: [{group_condition_values.min():.3f}, {group_condition_values.max():.3f}]")
        
        # Check if we have enough data
        if len(group_outcome) < 50:
            print(f"Warning: Small sample size for {group_label}")
        
        # Check treatment variation
        if group_treatment.var() == 0:
            print(f"No treatment variation for {group_label}")
            continue
            
        try:
            # Step 1: Estimate Q (outcome model) using cross-fitting
            print("Estimating outcome model (Q)...")
            Q0, Q1 = outcome_k_fold_fit_and_predict(
                make_Q_model, 
                X=group_confounders, 
                y=group_outcome, 
                A=group_treatment,
                n_splits=10,
                output_type='binary'
            )
            
            # Step 2: Define g (propensity score)
            # Since this is a randomized experiment, g = 0.5
            g = 0.5  
            print(f"Using propensity score g = {g} (randomized experiment)")
            
            # Alternative: Estimate propensity score if needed
            # g_estimated = group_treatment.mean()  # Empirical treatment probability
            
            # Step 3: Estimate CATE using AIPTW
            print("Estimating CATE...")
            tau_hat, std_hat = cate_aiptw(Q0, Q1, g, group_treatment, group_outcome)
            
            # Calculate confidence interval
            ci_lower = tau_hat - 1.96 * std_hat
            ci_upper = tau_hat + 1.96 * std_hat
            
            # Store results
            results[group] = {
                'group_label': group_label,
                'n': len(group_outcome),
                'treatment_rate': group_treatment.mean(),
                'outcome_rate': group_outcome.mean(),
                'mean_condition': group_condition_values.mean(),
                'condition_range': (group_condition_values.min(), group_condition_values.max()),
                'cate_estimate': tau_hat,
                'std_error': std_hat,
                'ci_lower': ci_lower,
                'ci_upper': ci_upper
            }
            
            print(f"CATE Estimate: {tau_hat:.4f}")
            print(f"Standard Error: {std_hat:.4f}")
            print(f"95% CI: [{ci_lower:.4f}, {ci_upper:.4f}]")
            
            # Interpretation
            if ci_lower > 0:
                print("Significant positive effect of black-sounding names")
            elif ci_upper < 0:
                print("Significant negative effect of black-sounding names")
            else:
                print("No significant effect")
                
        except Exception as e:
            print(f"Error estimating CATE for {group_label}: {str(e)}")
            
        print("\n")
    
    return results

In [None]:
# using the threshold:
# Never: = 0, Medium: 0-0.1, High: > 0.1
results_threshold = estimate_cate_threshold(main_data_cleaned, confounders, outcome, treatment, 
                                            condition_prop,
                                            condition_name="proportion of black guests",
                                            thresholds=[0, 0.1])
results_threshold

=== CATE Estimation by proportion of black guests ===

Continuous variable summary:
Mean: 0.0293
Std: 0.0898
Min: 0.0000, Max: 1.0000
Quantiles: [0.         0.         0.01169604]

--- Never had black guests before: = 0 ---
Sample size: 4404
Treatment rate: 0.500
Outcome rate: 0.417
Mean proportion of black guests: 0.000
proportion of black guests range: [0.000, 0.000]
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: -0.1032
Standard Error: 0.0146
95% CI: [-0.1318, -0.0746]
Significant negative effect of black-sounding names


--- Medium proportion of black guests: 0-0.1 ---
Sample size: 1245
Treatment rate: 0.491
Outcome rate: 0.565
Mean proportion of black guests: 0.039
proportion of black guests range: [0.001, 0.100]
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: -0.0073
Standard Error: 0.0279
95% CI: [-0.0621, 0.0474]
No significant effec

{0: {'group_label': 'Never had black guests before: = 0',
  'n': 4404,
  'treatment_rate': 0.49977293369663944,
  'outcome_rate': 0.4173478655767484,
  'mean_condition': 0.0,
  'condition_range': (0.0, 0.0),
  'cate_estimate': -0.10321419097664655,
  'std_error': 0.014602404141980777,
  'ci_lower': -0.13183490309492887,
  'ci_upper': -0.07459347885836423},
 1: {'group_label': 'Medium proportion of black guests: 0-0.1',
  'n': 1245,
  'treatment_rate': 0.4907630522088353,
  'outcome_rate': 0.5646586345381526,
  'mean_condition': 0.03931594533526104,
  'condition_range': (0.0010341262, 0.1),
  'cate_estimate': -0.007342352244012555,
  'std_error': 0.02791525153647178,
  'ci_lower': -0.062056245255497244,
  'ci_upper': 0.04737154076747213},
 2: {'group_label': 'High proportion of black guests: > 0.1',
  'n': 519,
  'treatment_rate': 0.4816955684007707,
  'outcome_rate': 0.4836223506743738,
  'mean_condition': 0.2541413757842004,
  'condition_range': (0.10526316, 1.0),
  'cate_estimate': -

In [52]:
# using the threshold:
# Never: = 0, Medium: 0-0.03, High: > 0.03
results_threshold_2 = estimate_cate_threshold(main_data_cleaned, confounders, outcome, treatment, 
                                            condition_prop,
                                            condition_name="proportion of black guests",
                                            thresholds=[0, 0.03])
results_threshold_2

=== CATE Estimation by proportion of black guests ===

Continuous variable summary:
Mean: 0.0293
Std: 0.0898
Min: 0.0000, Max: 1.0000
Quantiles: [0.         0.         0.01169604]

--- Never had black guests before: = 0 ---
Sample size: 4404
Treatment rate: 0.500
Outcome rate: 0.417
Mean proportion of black guests: 0.000
proportion of black guests range: [0.000, 0.000]
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: -0.1032
Standard Error: 0.0146
95% CI: [-0.1318, -0.0746]
Significant negative effect of black-sounding names


--- Medium proportion of black guests: 0-0.03 ---
Sample size: 585
Treatment rate: 0.499
Outcome rate: 0.609
Mean proportion of black guests: 0.015
proportion of black guests range: [0.001, 0.030]
Estimating outcome model (Q)...
Using propensity score g = 0.5 (randomized experiment)
Estimating CATE...
CATE Estimate: 0.0025
Standard Error: 0.0398
95% CI: [-0.0755, 0.0805]
No significant effect

{0: {'group_label': 'Never had black guests before: = 0',
  'n': 4404,
  'treatment_rate': 0.49977293369663944,
  'outcome_rate': 0.4173478655767484,
  'mean_condition': 0.0,
  'condition_range': (0.0, 0.0),
  'cate_estimate': -0.10321419097664655,
  'std_error': 0.014602404141980777,
  'ci_lower': -0.13183490309492887,
  'ci_upper': -0.07459347885836423},
 1: {'group_label': 'Medium proportion of black guests: 0-0.03',
  'n': 585,
  'treatment_rate': 0.49914529914529915,
  'outcome_rate': 0.6085470085470085,
  'mean_condition': 0.01493581535111111,
  'condition_range': (0.0010341262, 0.029850746),
  'cate_estimate': 0.0025270461441415193,
  'std_error': 0.039806623118180326,
  'ci_lower': -0.07549393516749191,
  'ci_upper': 0.08054802745577495},
 2: {'group_label': 'High proportion of black guests: > 0.03',
  'n': 1179,
  'treatment_rate': 0.48261238337574214,
  'outcome_rate': 0.5072094995759118,
  'mean_condition': 0.14597987616115352,
  'condition_range': (0.030303031, 1.0),
  'cat

It is interesting that when conditioning based on binary groups (never had black guests v.s. had at least one black guest), the negative effect of black-sounding names is only statistically significant among hosts who have ever had black guests before, suggesting that the discrimiation only occurs among a subset of hosts.  
If we divide into three condition groups based on the proportion of black guest among all past guests and condition on the three groups, it turns out that only the group with medium proportion of black guests sees no statistically significant effects. For both the group that never had black guests before and the group with high proportion of black guests, black-sounding names have a significant negative effect. The result holds both when we set the threshold between medium and high proportion as 0.1 and 0.03. Therefore, this suggests that discriminations exists even among hosts with high proportion of blacks guests in the past.