In [None]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Load combined data
sales_before = pd.read_csv('../data/sales_before.csv')
sales_after = pd.read_csv('../data/sales_after.csv')
data = pd.concat([sales_before, sales_after])

# Create treatment and post variables
data['treatment'] = [0] * len(sales_before) + [1] * len(sales_after)
data['post'] = (data['date'] >= '2020-01-01').astype(int)

# Include store characteristics and customer demographics in the analysis
store_characteristics = pd.read_csv('../data/store_characteristics.csv')
customer_demographics = pd.read_csv('../data/customer_demographics.csv')
data = data.merge(store_characteristics, on='store_id', how='left')
data = data.merge(customer_demographics, on='customer_id', how='left')

# DiD analysis with interactions for heterogeneity
did_heterogeneity_model = ols('sales ~ treatment * post * (store_size + store_location + income)', data=data).fit()
print(did_heterogeneity_model.summary())