In [11]:
import pandas as pd
import numpy as np

In [14]:
# Load data from the CSV file
df = pd.read_csv('paidsearch.csv')
df.head()

Unnamed: 0,date,dma,treatment_period,search_stays_on,revenue
0,1-Apr-12,500,0,0,76718.74
1,1-Apr-12,501,0,1,2096177.0
2,1-Apr-12,502,0,1,34993.85
3,1-Apr-12,503,0,1,34198.75
4,1-Apr-12,504,0,1,641014.2


In [15]:
# Calculate the log of the revenue
df['log_revenue'] = np.log(df['revenue'])

# Filter the DataFrame to include only treated units
treated_units = df[df['search_stays_on'] == 0]

# Create a pivot table with dma as the index and log revenue for treatment periods 0 and 1 for treated units
treated_pivot = treated_units.pivot_table(index='dma', columns='treatment_period', values='log_revenue')
treated_pivot.columns = ['log_revenue_pre', 'log_revenue_post']
treated_pivot['log_revenue_diff'] = treated_pivot['log_revenue_post'] - treated_pivot['log_revenue_pre']

treated_pivot.head()

Unnamed: 0_level_0,log_revenue_pre,log_revenue_post,log_revenue_diff
dma,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
500,11.228004,11.176174,-0.05183
505,12.816405,12.75291,-0.063495
511,13.344097,13.305156,-0.038941
513,11.324759,11.223367,-0.101392
517,12.273978,12.238071,-0.035908


In [16]:
# Compute the average log difference for treated units
avg_log_diff_treated = treated_pivot['log_revenue_diff'].mean()

# Compute the variance of the log differences for treated units and divide by the number of treated units
var_log_diff_treated = treated_pivot['log_revenue_diff'].var() / len(treated_pivot)

In [17]:
# Filter the DataFrame to include only untreated units
untreated_units = df[df['search_stays_on'] == 1]

# Create a pivot table with dma as the index and log revenue for treatment periods 0 and 1 for untreated units
untreated_pivot = untreated_units.pivot_table(index='dma', columns='treatment_period', values='log_revenue')
untreated_pivot.columns = ['log_revenue_pre', 'log_revenue_post']
untreated_pivot['log_revenue_diff'] = untreated_pivot['log_revenue_post'] - untreated_pivot['log_revenue_pre']

untreated_pivot.head()

Unnamed: 0_level_0,log_revenue_pre,log_revenue_post,log_revenue_diff
dma,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
501,14.579999,14.561343,-0.018656
502,10.385163,10.285722,-0.099442
503,10.48166,10.506597,0.024936
504,13.394976,13.359118,-0.035858
506,13.242097,13.195538,-0.046559


In [None]:
# Compute the average log difference for untreated units
avg_log_diff_untreated = untreated_pivot['log_revenue_diff'].mean()

# Compute the variance of the log differences for untreated units and divide by the number of untreated units
var_log_diff_untreated = untreated_pivot['log_revenue_diff'].var() / len(untreated_pivot)

In [18]:
# Compute the difference between the average log differences (gamma_hat)
gamma_hat = avg_log_diff_treated - avg_log_diff_untreated

# Compute the sum of the variances
sum_variances = var_log_diff_treated + var_log_diff_untreated

# Compute the standard error
standard_error = np.sqrt(sum_variances)

# Compute the 95% confidence interval for the treatment effect
ci_lower = gamma_hat - 1.96 * standard_error
ci_upper = gamma_hat + 1.96 * standard_error

# Exponentiate the midpoint and the extremes of the interval
gamma_hat_exp = np.exp(gamma_hat)
ci_lower_exp = np.exp(ci_lower)
ci_upper_exp = np.exp(ci_upper)

# Display results
print("Average Log Difference for Treated Units:", avg_log_diff_treated)
print("Average Log Difference for Untreated Units:", avg_log_diff_untreated)
print("\nGamma Hat (Difference between Averages):", gamma_hat)
print("Standard Error (SE):", standard_error)
print("95% Confidence Interval: [{}, {}]".format(ci_lower, ci_upper))

print("\nExponentiated Results:")
print("Exponentiated Gamma Hat:", gamma_hat_exp)
print("Exponentiated 95% Confidence Interval: [{}, {}]".format(ci_lower_exp, ci_upper_exp))

Average Log Difference for Treated Units: -0.045986480525304546
Average Log Difference for Untreated Units: -0.03939962868098962

Gamma Hat (Difference between Averages): -0.006586851844314928
Standard Error (SE): 0.00555508210381524
95% Confidence Interval: [-0.017474812767792797, 0.004301109079162941]

Exponentiated Results:
Exponentiated Gamma Hat: 0.9934347939124213
Exponentiated 95% Confidence Interval: [0.9826769862668101, 1.004310372124513]
