# Example

In [1]:
import pandas as pd
import spillover_effects as spef

### Read simulated data

There are 2,000 individuals with a unique ID and they can mention up to 12 friends. 

We simulate outcomes, treatment and covariate using a linear-in-means model with homophily.

In [2]:
path_data = 'https://raw.githubusercontent.com/pabloestradac/spillover-effects/main/data/'
edges = pd.read_csv(path_data + 'edges.csv')
edges.iloc[:10]

Unnamed: 0,source,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12
0,i0,i1869,i714,i1420,i1521,i274,,,,,,,
1,i1,i1230,i704,i1378,,,,,,,,,
2,i2,i1815,i1073,,,,,,,,,,
3,i3,i172,i1243,i1076,i573,i178,i1732,i1071,i1790,i543,,,
4,i4,i775,i1006,i272,i1684,,,,,,,,
5,i5,i1450,i1520,i1961,i655,i1239,,,,,,,
6,i6,i859,,,,,,,,,,,
7,i7,i1421,i472,i1413,i126,i733,,,,,,,
8,i8,i1906,i577,i1169,i978,i1629,,,,,,,
9,i9,i281,i1572,i1264,,,,,,,,,


In [3]:
data = pd.read_csv(path_data + 'data.csv')
data.iloc[:10] 

Unnamed: 0,node,Y,D,X
0,i0,-3.397137,0,0.013402
1,i1,-3.523659,1,-0.234082
2,i2,-4.808487,0,1.027604
3,i3,0.225275,0,-1.101414
4,i4,0.341246,0,-1.041121
5,i5,-3.090114,0,-1.024225
6,i6,-3.305957,0,-0.360328
7,i7,-0.700885,1,-1.22859
8,i8,-3.120404,1,-0.048571
9,i9,2.101518,0,-0.16549


### Spillover Effects

We are interested in estimating the spillover effects of having at least one connection treated (Example 2.2. of Gao and Ding, 2023). In order to use the WLS estimator, we need to calculate the kernel weight matrix, exposure treatment, and propensity score. This is breakdown in four steps.

In [4]:
# 1. Create adjacency matrix and array with nodes order
A, nodes = spef.utils.adjacency_matrix(edges, directed=True)
A, nodes

(<2000x2000 sparse matrix of type '<class 'numpy.uint32'>'
 	with 7720 stored elements in COOrdinate format>,
 array(['i0', 'i1869', 'i714', ..., 'i1846', 'i1933', 'i1940'],
       dtype=object))

In [5]:
# 2. Reorder data to match adjacency matrix order
data = data.set_index('node').loc[nodes].reset_index()
data.iloc[:10]

Unnamed: 0,node,Y,D,X
0,i0,-3.397137,0,0.013402
1,i1869,-0.444757,0,-0.158512
2,i714,-2.101452,0,0.45506
3,i1420,0.667575,1,-0.964647
4,i1521,-3.863034,0,1.981263
5,i274,-4.948267,0,-0.210056
6,i1,-3.523659,1,-0.234082
7,i1230,-0.68293,0,-0.589596
8,i704,-4.965665,0,1.103453
9,i1378,-3.364103,1,0.00642


In [6]:
# 3. Create exposure treatments and propensity score
# pscore1: probability of having at least one neighbor treated
# exposure1: 1 if at least one neighbor is treated, 0 otherwise
n_treated = data['D'].sum()
data[['pscore0', 'pscore1']] = spef.utils.spillover_pscore(A, n_treated)
data[['exposure0', 'exposure1']] = spef.utils.spillover_treatment(data['D'], A)
data.iloc[:10]

Unnamed: 0,node,Y,D,X,pscore0,pscore1,exposure0,exposure1
0,i0,-3.397137,0,0.013402,0.256505,0.743495,0,1
1,i1869,-0.444757,0,-0.158512,0.195304,0.804696,0,1
2,i714,-2.101452,0,0.45506,0.256505,0.743495,0,1
3,i1420,0.667575,1,-0.964647,0.336831,0.663169,1,0
4,i1521,-3.863034,0,1.981263,0.336831,0.663169,1,0
5,i274,-4.948267,0,-0.210056,0.336831,0.663169,0,1
6,i1,-3.523659,1,-0.234082,0.442243,0.557757,0,1
7,i1230,-0.68293,0,-0.589596,0.336831,0.663169,0,1
8,i704,-4.965665,0,1.103453,0.580553,0.419447,0,1
9,i1378,-3.364103,1,0.00642,0.442243,0.557757,0,1


In [7]:
# 4. Create kernel matrix for HAC standard errors
# Element i,j is 1 if the lenght of the shortest path between i and j is less than or equal to bandwidth
bandwidth = 3
distances = spef.utils.kernel(A, bandwidth)

In [8]:
# No covariate
wls_results = spef.WLS('Y', ['exposure0', 'exposure1'], ['pscore0', 'pscore1'], data, kernel_weights=distances)
print('The effect of having at least one treated neighbor is {:.2f}'.format(wls_results.summary.iloc[0,0]))
wls_results.summary.round(2)

The effect of having at least one treated neighbor is 0.64


Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.64,0.34,1.86,0.06,-0.04,1.32
exposure0,-3.96,0.34,-11.52,0.0,-4.64,-3.29
exposure1,-3.32,0.27,-12.51,0.0,-3.84,-2.8


In [9]:
# Use Eicker-Huber-White standard errors assuming no interference
wls_results = spef.WLS('Y', ['exposure0', 'exposure1'], ['pscore0', 'pscore1'], data)
wls_results.summary.round(2)



Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.64,0.2,3.18,0.0,0.25,1.03
exposure0,-3.96,0.16,-25.5,0.0,-4.27,-3.66
exposure1,-3.32,0.13,-26.02,0.0,-3.57,-3.07


In [10]:
# Including fully interacted covariates
wls_results = spef.WLS('Y', ['exposure0', 'exposure1'], ['pscore0', 'pscore1'], data, kernel_weights=distances, name_x='X')
print('The effect of having at least one treated neighbor is {:.2f}'.format(wls_results.summary.iloc[0,0]))
wls_results.summary.round(2)

The effect of having at least one treated neighbor is 0.71


Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.71,0.3,2.36,0.02,0.12,1.3
exposure0,-4.01,0.31,-12.95,0.0,-4.62,-3.4
exposure1,-3.3,0.23,-14.42,0.0,-3.75,-2.85
exposure0*X,-2.08,0.14,-14.49,0.0,-2.37,-1.8
exposure1*X,-2.21,0.11,-19.57,0.0,-2.43,-1.99


In [11]:
# Including additive covariates
wls_results = spef.WLS('Y', ['exposure0', 'exposure1'], ['pscore0', 'pscore1'], data, kernel_weights=distances, name_x='X', interaction=False)
print('The effect of having at least one treated neighbor is {:.2f}'.format(wls_results.summary.iloc[0,0]))
wls_results.summary.round(2)

The effect of having at least one treated neighbor is 0.71


Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.71,0.3,2.36,0.02,0.12,1.3
exposure0,-4.01,0.31,-12.99,0.0,-4.62,-3.41
exposure1,-3.3,0.23,-14.37,0.0,-3.75,-2.85
X,-2.15,0.09,-24.28,0.0,-2.32,-1.97


## Spillover Effects with 2-dimensional Exposure Mapping

Example 2.3 of Gao and Ding (2023)

In [12]:
# 3. Create exposure treatments and propensity score
# pscore11: probability of being treated and having at least one neighbor treated
# exposure11: 1 if treated and at least one neighbor is treated, 0 otherwise
n_treated = data['D'].sum()
data[['pscore00', 'pscore01', 'pscore10', 'pscore11']] = spef.utils.spillover_pscore(A, n_treated, matrix=True)
data[['exposure00', 'exposure01', 'exposure10', 'exposure11']] = spef.utils.spillover_treatment(data['D'], A, interaction=True)
data.iloc[:10]

Unnamed: 0,node,Y,D,X,pscore0,pscore1,exposure0,exposure1,pscore00,pscore01,pscore10,pscore11,exposure00,exposure01,exposure10,exposure11
0,i0,-3.397137,0,0.013402,0.256505,0.743495,0,1,0.195457,0.566543,0.061048,0.176952,0,1,0,0
1,i1869,-0.444757,0,-0.158512,0.195304,0.804696,0,1,0.148821,0.613179,0.046482,0.191518,0,1,0,0
2,i714,-2.101452,0,0.45506,0.256505,0.743495,0,1,0.195457,0.566543,0.061048,0.176952,0,1,0,0
3,i1420,0.667575,1,-0.964647,0.336831,0.663169,1,0,0.256665,0.505335,0.080166,0.157834,0,0,1,0
4,i1521,-3.863034,0,1.981263,0.336831,0.663169,1,0,0.256665,0.505335,0.080166,0.157834,1,0,0,0
5,i274,-4.948267,0,-0.210056,0.336831,0.663169,0,1,0.256665,0.505335,0.080166,0.157834,0,1,0,0
6,i1,-3.523659,1,-0.234082,0.442243,0.557757,0,1,0.336989,0.425011,0.105254,0.132746,0,0,0,1
7,i1230,-0.68293,0,-0.589596,0.336831,0.663169,0,1,0.256665,0.505335,0.080166,0.157834,0,1,0,0
8,i704,-4.965665,0,1.103453,0.580553,0.419447,0,1,0.442382,0.319618,0.138172,0.099828,0,1,0,0
9,i1378,-3.364103,1,0.00642,0.442243,0.557757,0,1,0.336989,0.425011,0.105254,0.132746,0,0,0,1


In [15]:
# Direct Effect (no covariate)
wls_results = spef.WLS('Y', 
                       ['exposure00', 'exposure01', 'exposure10', 'exposure11'], 
                       ['pscore00', 'pscore01', 'pscore10', 'pscore11'], 
                       data, kernel_weights=distances, contrast='direct')
print('The effect of having at least one treated neighbor is {:.2f}'.format(wls_results.summary.iloc[0,0]))
wls_results.summary.round(2)

The effect of having at least one treated neighbor is 1.00


Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
direct,1.0,0.27,3.66,0.0,0.47,1.54
exposure00,-4.24,0.42,-10.1,0.0,-5.06,-3.41
exposure01,-3.49,0.27,-13.0,0.0,-4.01,-2.96
exposure10,-2.97,0.34,-8.78,0.0,-3.64,-2.31
exposure11,-2.75,0.41,-6.64,0.0,-3.56,-1.94


In [16]:
# Spillover Effect (no covariate)
wls_results = spef.WLS('Y', 
                       ['exposure00', 'exposure01', 'exposure10', 'exposure11'], 
                       ['pscore00', 'pscore01', 'pscore10', 'pscore11'], 
                       data, kernel_weights=distances, contrast='spillover')
print('The effect of having at least one treated neighbor is {:.2f}'.format(wls_results.summary.iloc[0,0]))
wls_results.summary.round(2)

The effect of having at least one treated neighbor is 0.49


Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.49,0.32,1.52,0.13,-0.14,1.11
exposure00,-4.24,0.42,-10.1,0.0,-5.06,-3.41
exposure01,-3.49,0.27,-13.0,0.0,-4.01,-2.96
exposure10,-2.97,0.34,-8.78,0.0,-3.64,-2.31
exposure11,-2.75,0.41,-6.64,0.0,-3.56,-1.94


In [18]:
# Spillover Effect (including covariate)
wls_results = spef.WLS('Y', 
                       ['exposure00', 'exposure01', 'exposure10', 'exposure11'], 
                       ['pscore00', 'pscore01', 'pscore10', 'pscore11'], 
                       data, kernel_weights=distances, name_x='X', contrast='spillover')
print('The effect of having at least one treated neighbor is {:.2f}'.format(wls_results.summary.iloc[0,0]))
wls_results.summary.round(2)

The effect of having at least one treated neighbor is 0.66


Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.66,0.28,2.37,0.02,0.11,1.2
exposure00,-4.21,0.38,-11.15,0.0,-4.95,-3.47
exposure01,-3.49,0.23,-15.02,0.0,-3.94,-3.03
exposure10,-3.24,0.32,-10.17,0.0,-3.86,-2.61
exposure11,-2.65,0.35,-7.65,0.0,-3.33,-1.97
exposure00*X,-2.05,0.15,-13.63,0.0,-2.35,-1.76
exposure01*X,-2.18,0.13,-16.75,0.0,-2.43,-1.92
exposure10*X,-2.07,0.36,-5.69,0.0,-2.78,-1.35
exposure11*X,-2.36,0.21,-11.43,0.0,-2.76,-1.95
