# Example

In [1]:
import numpy as np
import pandas as pd
import scipy as sp

In [2]:
# !pip install spillover-effects

import spillover-effects as spef

### Simulate data

There are 2,000 individuals with a unique ID and they can mention up to 12 friends. 

We simulate outcomes, treatment and covariate using a linear-in-means model with homophily.

In [3]:
edges = pd.read_csv('data/edges.csv')
edges.iloc[:10]

Unnamed: 0,source,target_1,target_2,target_3,target_4,target_5,target_6,target_7,target_8,target_9,target_10,target_11,target_12
0,i0,i1869,i714,i1420,i1521,i274,,,,,,,
1,i1,i1230,i704,i1378,,,,,,,,,
2,i2,i1815,i1073,,,,,,,,,,
3,i3,i172,i1243,i1076,i573,i178,i1732,i1071,i1790,i543,,,
4,i4,i775,i1006,i272,i1684,,,,,,,,
5,i5,i1450,i1520,i1961,i655,i1239,,,,,,,
6,i6,i859,,,,,,,,,,,
7,i7,i1421,i472,i1413,i126,i733,,,,,,,
8,i8,i1906,i577,i1169,i978,i1629,,,,,,,
9,i9,i281,i1572,i1264,,,,,,,,,


In [4]:
data = pd.read_csv('data/data.csv')
data.iloc[:10] 

Unnamed: 0,node,Y,D,X
0,i0,-3.397137,0,0.013402
1,i1,-3.523659,1,-0.234082
2,i2,-4.808487,0,1.027604
3,i3,0.225275,0,-1.101414
4,i4,0.341246,0,-1.041121
5,i5,-3.090114,0,-1.024225
6,i6,-3.305957,0,-0.360328
7,i7,-0.700885,1,-1.22859
8,i8,-3.120404,1,-0.048571
9,i9,2.101518,0,-0.16549


### Adjacency matrix and treatment exposures

In [5]:
# 1. Create adjacency matrix and array with nodes order
A, nodes = utils.adjacency_matrix(edges, directed=True)
A, nodes

(<2000x2000 sparse matrix of type '<class 'numpy.uint32'>'
 	with 7720 stored elements in COOrdinate format>,
 array(['i0', 'i1869', 'i714', ..., 'i1846', 'i1933', 'i1940'],
       dtype=object))

In [6]:
# 2. Reorder data to match adjacency matrix order
data = data.set_index('node').loc[nodes].reset_index()
data.iloc[:10]

Unnamed: 0,node,Y,D,X
0,i0,-3.397137,0,0.013402
1,i1869,-0.444757,0,-0.158512
2,i714,-2.101452,0,0.45506
3,i1420,0.667575,1,-0.964647
4,i1521,-3.863034,0,1.981263
5,i274,-4.948267,0,-0.210056
6,i1,-3.523659,1,-0.234082
7,i1230,-0.68293,0,-0.589596
8,i704,-4.965665,0,1.103453
9,i1378,-3.364103,1,0.00642


In [7]:
# 3. Create exposure treatments and propensity score
n_treated = data['D'].sum()
data[['pscore0', 'pscore1']] = utils.spillover_pscore(A, n_treated)
data[['exposure0', 'exposure1']] = utils.spillover_treatment(data['D'], A)
data.iloc[:10]

Unnamed: 0,node,Y,D,X,pscore0,pscore1,exposure0,exposure1
0,i0,-3.397137,0,0.013402,0.256505,0.743495,0,1
1,i1869,-0.444757,0,-0.158512,0.195304,0.804696,0,1
2,i714,-2.101452,0,0.45506,0.256505,0.743495,0,1
3,i1420,0.667575,1,-0.964647,0.336831,0.663169,1,0
4,i1521,-3.863034,0,1.981263,0.336831,0.663169,1,0
5,i274,-4.948267,0,-0.210056,0.336831,0.663169,0,1
6,i1,-3.523659,1,-0.234082,0.442243,0.557757,0,1
7,i1230,-0.68293,0,-0.589596,0.336831,0.663169,0,1
8,i704,-4.965665,0,1.103453,0.580553,0.419447,0,1
9,i1378,-3.364103,1,0.00642,0.442243,0.557757,0,1


In [8]:
# 4. Create kernel matrix for HAC standard errors
bandwidth = 3
distances = utils.kernel(A, bandwidth)

In [9]:
wls_results = WLS('Y', ['exposure0', 'exposure1'], ['pscore0', 'pscore1'], data, kernel_weights=distances)
wls_results.summary.round(2)



Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.64,0.34,1.86,0.06,-0.04,1.32
exposure0,-3.96,0.34,-11.52,0.0,-4.64,-3.29
exposure1,-3.32,0.27,-12.51,0.0,-3.84,-2.8


In [10]:
wls_results = WLS('Y', ['exposure0', 'exposure1'], ['pscore0', 'pscore1'], data, kernel_weights=distances, name_x='X')
wls_results.summary.round(2)



Unnamed: 0,coef,se,t-val,p-val,ci-low,ci-up
spillover,0.71,0.3,2.36,0.02,0.12,1.3
exposure0,-4.01,0.31,-12.95,0.0,-4.62,-3.4
exposure1,-3.3,0.23,-14.42,0.0,-3.75,-2.85
exposure0*X,-2.08,0.14,-14.49,0.0,-2.37,-1.8
exposure1*X,-2.21,0.11,-19.57,0.0,-2.43,-1.99
