In [6]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
import seaborn as sns
import pyreadr

## DATA

Please note that data are not available in the repository.

In [7]:
us_codes = pd.read_csv('./data/us_geoid_gid.csv', dtype = {'GEOID' : 'str'})

coloc = pd.read_csv('./data/colocation-us-mean.csv')

sci = pd.read_csv('./data/sci-cleaned.csv', dtype = {'own_county' : 'str', 'friend_county' : 'str'})
sci = sci.merge(us_codes[['GID_2', 'GEOID']], how = 'left', left_on = 'own_county', right_on = 'GEOID')
sci = sci.merge(us_codes[['GID_2', 'GEOID']], how = 'left', left_on = 'friend_county', right_on = 'GEOID')[['GID_2_x', 'GID_2_y', 'rel_prob_friend']]

In [8]:
#drop Alaska and Hawaii

coloc = coloc[(coloc['polygon1_id'].str.startswith('USA.2.') == False) & (coloc['polygon1_id'].str.startswith('USA.12.') == False) &
                (coloc['polygon2_id'].str.startswith('USA.2.') == False) & (coloc['polygon2_id'].str.startswith('USA.12.') == False)]

sci = sci[(sci['GID_2_x'].str.startswith('USA.2.') == False) & (sci['GID_2_x'].str.startswith('USA.12.') == False) &
                (sci['GID_2_y'].str.startswith('USA.2.') == False) & (sci['GID_2_y'].str.startswith('USA.12.') == False)]

In [9]:
df_corr = coloc.merge(sci, how='inner', left_on=['polygon1_id', 'polygon2_id'], right_on=['GID_2_x', 'GID_2_y'])
print(df_corr['weekly_colocation_rate'].corr(df_corr['rel_prob_friend']))

0.8283940613942162


In [10]:
# data on presidential elections

votes = pyreadr.read_r('./data/dataverse_shareable_presidential_county_returns_1868_2020.Rdata')

votes = votes['pres_elections_release']

# keep the last 3 presidential elections
votes12_20 = votes[(votes['election_year'] == 2012) | (votes['election_year'] == 2016) | (votes['election_year'] == 2020)]
votes12_20 = votes12_20[['fips', 'election_year', 'county_name', 'democratic_raw_votes', 'republican_raw_votes', 'raw_county_vote_totals']]

votes12_20['perc_dem'] = (votes12_20['democratic_raw_votes'] / votes12_20['raw_county_vote_totals']).round(4)
votes12_20['perc_rep'] = (votes12_20['republican_raw_votes'] / votes12_20['raw_county_vote_totals']).round(4)

normal_vote = votes12_20.groupby(['fips']).agg({'perc_rep' : 'mean', 'perc_dem' : 'mean'}).reset_index()

county_votes_share = normal_vote.merge(us_codes, how='inner', left_on = 'fips', right_on = 'GEOID')

county_votes_share['perc_rep'] = county_votes_share['perc_rep'].round(4)
county_votes_share['perc_dem'] = county_votes_share['perc_dem'].round(4)

### COMPUTE OFFLINE PROXIMITY

In [18]:
#merge datasets to have the results for each county
coloc_offline_exposure_tmp = coloc.merge(county_votes_share[['GID_2', 'perc_rep', 'perc_dem']], how='left', left_on = 'polygon1_id', right_on = 'GID_2')
coloc_offline_exposure = coloc_offline_exposure_tmp.merge(county_votes_share[['GID_2', 'perc_rep', 'perc_dem']], how='left', left_on = 'polygon2_id', right_on = 'GID_2')

In [19]:
coloc_offline_exposure

Unnamed: 0,polygon1_id,polygon1_name,polygon2_id,polygon2_name,weekly_colocation_rate,GID_2_x,perc_rep_x,perc_dem_x,GID_2_y,perc_rep_y,perc_dem_y
0,USA.1.10_1,Cherokee,USA.1.10_1,Cherokee,1.446241e-03,USA.1.10_1,0.8203,0.1646,USA.1.10_1,0.8203,0.1646
1,USA.1.10_1,Cherokee,USA.1.11_1,Chilton,1.115423e-06,USA.1.10_1,0.8203,0.1646,USA.1.11_1,0.8169,0.1706
2,USA.1.10_1,Cherokee,USA.1.12_1,Choctaw,3.318174e-07,USA.1.10_1,0.8203,0.1646,USA.1.12_1,0.5531,0.4400
3,USA.1.10_1,Cherokee,USA.1.13_1,Clarke,1.808782e-07,USA.1.10_1,0.8203,0.1646,USA.1.13_1,0.5482,0.4454
4,USA.1.10_1,Cherokee,USA.1.14_1,Clay,5.766579e-06,USA.1.10_1,0.8203,0.1646,USA.1.14_1,0.7737,0.2119
...,...,...,...,...,...,...,...,...,...,...,...
9235886,USA.8.1_1,Kent,USA.42.47_1,Mellette,3.417362e-10,USA.8.1_1,0.4791,0.4928,USA.42.47_1,0.5560,0.4079
9235887,USA.8.1_1,Kent,USA.42.8_1,Buffalo,1.058619e-09,USA.8.1_1,0.4791,0.4928,USA.42.8_1,0.3138,0.6605
9235888,USA.8.1_1,Kent,USA.44.197_1,Roberts,1.238054e-08,USA.8.1_1,0.4791,0.4928,USA.44.197_1,0.9430,0.0440
9235889,USA.8.3_1,Sussex,USA.32.12_1,Harding,3.131884e-09,USA.8.3_1,0.5671,0.4129,USA.32.12_1,0.5886,0.3610


In [20]:
coloc_offline_exposure['tot_coloc'] = coloc_offline_exposure.groupby(['polygon1_id'])['weekly_colocation_rate'].transform('sum')

#proximity to republican
coloc_offline_exposure['proximity_rep_coloc'] = (coloc_offline_exposure['perc_rep_y'] * (coloc_offline_exposure['weekly_colocation_rate'] / coloc_offline_exposure['tot_coloc'])).round(4)

#proximity to democrats
coloc_offline_exposure['proximity_dem_coloc'] = (coloc_offline_exposure['perc_dem_y'] * (coloc_offline_exposure['weekly_colocation_rate'] / coloc_offline_exposure['tot_coloc'])).round(4)

proximity_coloc = coloc_offline_exposure.groupby(['polygon1_id']).agg({'proximity_rep_coloc' : 'sum', 'proximity_dem_coloc' : 'sum'}).reset_index()


In [21]:
proximity_coloc

Unnamed: 0,polygon1_id,proximity_rep_coloc,proximity_dem_coloc
0,USA.1.10_1,0.7730,0.1879
1,USA.1.11_1,0.7355,0.2241
2,USA.1.12_1,0.5392,0.4363
3,USA.1.13_1,0.5308,0.4466
4,USA.1.14_1,0.7475,0.2236
...,...,...,...
3112,USA.7.8_1,0.4637,0.4888
3113,USA.8.1_1,0.4758,0.4781
3114,USA.8.2_1,0.3525,0.5999
3115,USA.8.3_1,0.5427,0.4165


### COMPUTE ONLINE PROXIMITY

In [22]:
#merge datasets to have the results for each county
sci_online_exposure_tmp = sci.merge(county_votes_share[['GID_2', 'perc_rep', 'perc_dem']], how='left', left_on = 'GID_2_x', right_on = 'GID_2')
sci_online_exposure = sci_online_exposure_tmp.merge(county_votes_share[['GID_2', 'perc_rep', 'perc_dem']], how='left', left_on = 'GID_2_y', right_on = 'GID_2')

sci_online_exposure = sci_online_exposure.loc[:,~sci_online_exposure.columns.duplicated()].copy()

In [23]:
sci_online_exposure['tot_sci'] = sci_online_exposure.groupby(['GID_2_x'])['rel_prob_friend'].transform('sum')

#proximity to republican
sci_online_exposure['proximity_rep_sci'] = (sci_online_exposure['perc_rep_y'] * (sci_online_exposure['rel_prob_friend'] / sci_online_exposure['tot_sci'])).round(4)

#proximity to democrats
sci_online_exposure['proximity_dem_sci'] = (sci_online_exposure['perc_dem_y'] * (sci_online_exposure['rel_prob_friend'] / sci_online_exposure['tot_sci'])).round(4)

proximity_sci = sci_online_exposure.groupby(['GID_2_x']).agg({'proximity_rep_sci' : 'sum', 'proximity_dem_sci' : 'sum'}).reset_index()


In [24]:
proximity_sci

Unnamed: 0,GID_2_x,proximity_rep_sci,proximity_dem_sci
0,USA.1.10_1,0.7545,0.1802
1,USA.1.11_1,0.7089,0.2207
2,USA.1.12_1,0.5370,0.4269
3,USA.1.13_1,0.5090,0.4514
4,USA.1.14_1,0.7221,0.2275
...,...,...,...
3093,USA.7.8_1,0.4538,0.4371
3094,USA.8.1_1,0.5006,0.4107
3095,USA.8.2_1,0.4493,0.4631
3096,USA.8.3_1,0.5149,0.3867


### MERGE DATASETS

In [25]:
partisan_exposure_tmp = proximity_coloc.merge(proximity_sci, how = 'inner', left_on = 'polygon1_id', right_on = 'GID_2_x')
partisan_exposure = partisan_exposure_tmp.merge(county_votes_share[['GID_2', 'GEOID', 'perc_rep', 'perc_dem']], how = 'inner', left_on = 'GID_2_x', right_on = 'GID_2')

In [26]:
partisan_exposure = partisan_exposure[['GID_2', 'GEOID', 'proximity_rep_coloc',
       'proximity_dem_coloc', 
       'proximity_rep_sci',
       'proximity_dem_sci', 
       'perc_rep', 'perc_dem']]

In [27]:
partisan_exposure

Unnamed: 0,GID_2,GEOID,proximity_rep_coloc,proximity_dem_coloc,proximity_rep_sci,proximity_dem_sci,perc_rep,perc_dem
0,USA.1.10_1,01019,0.7730,0.1879,0.7545,0.1802,0.8203,0.1646
1,USA.1.11_1,01021,0.7355,0.2241,0.7089,0.2207,0.8169,0.1706
2,USA.1.12_1,01023,0.5392,0.4363,0.5370,0.4269,0.5531,0.4400
3,USA.1.13_1,01025,0.5308,0.4466,0.5090,0.4514,0.5482,0.4454
4,USA.1.14_1,01027,0.7475,0.2236,0.7221,0.2275,0.7737,0.2119
...,...,...,...,...,...,...,...,...
3093,USA.7.8_1,09015,0.4637,0.4888,0.4538,0.4371,0.4805,0.4846
3094,USA.8.1_1,10001,0.4758,0.4781,0.5006,0.4107,0.4791,0.4928
3095,USA.8.2_1,10003,0.3525,0.5999,0.4493,0.4631,0.3188,0.6547
3096,USA.8.3_1,10005,0.5427,0.4165,0.5149,0.3867,0.5671,0.4129


## COMPUTE EXTROVERSION

In [28]:
self_coloc = coloc_offline_exposure[coloc_offline_exposure['polygon1_id'] == coloc_offline_exposure['polygon2_id']]
noself_coloc = coloc_offline_exposure[coloc_offline_exposure['polygon1_id'] != coloc_offline_exposure['polygon2_id']]

self_sci = sci_online_exposure[sci_online_exposure['GID_2_x'] == sci_online_exposure['GID_2_y']]
noself_sci = sci_online_exposure[sci_online_exposure['GID_2_x'] != sci_online_exposure['GID_2_y']]

In [29]:
self_coloc = self_coloc.groupby(['polygon1_id']).agg({'weekly_colocation_rate' : 'sum'}).reset_index()
noself_coloc = noself_coloc.groupby(['polygon1_id']).agg({'weekly_colocation_rate' : 'sum'}).reset_index()

self_sci = self_sci.groupby(['GID_2_x']).agg({'rel_prob_friend' : 'sum'}).reset_index()
noself_sci = noself_sci.groupby(['GID_2_x']).agg({'rel_prob_friend' : 'sum'}).reset_index()

In [30]:
extroversion_coloc = self_coloc[['polygon1_id', 'weekly_colocation_rate']].merge(noself_coloc, how='outer', on='polygon1_id')
extroversion_sci = self_sci[['GID_2_x', 'rel_prob_friend']].merge(noself_sci, how='outer', on='GID_2_x')

In [31]:
extroversion_coloc['extroversion_coloc'] = (extroversion_coloc['weekly_colocation_rate_y'] / extroversion_coloc['weekly_colocation_rate_x']).round(4)
extroversion_sci['extroversion_sci'] = (extroversion_sci['rel_prob_friend_y'] / extroversion_sci['rel_prob_friend_x']).round(4)

In [33]:
extroversion = extroversion_coloc.merge(extroversion_sci, how='outer', left_on='polygon1_id', right_on='GID_2_x')[['polygon1_id', 'introversion_coloc', 'introversion_sci']]

In [34]:
partisan_exposure_extroversion = partisan_exposure.merge(extroversion, how='left', left_on='GID_2', right_on='polygon1_id')

In [35]:
partisan_exposure_extroversion

Unnamed: 0,GID_2,GEOID,proximity_rep_coloc,proximity_dem_coloc,proximity_rep_sci,proximity_dem_sci,perc_rep,perc_dem,polygon1_id,introversion_coloc,introversion_sci
0,USA.1.10_1,01019,0.7730,0.1879,0.7545,0.1802,0.8203,0.1646,USA.1.10_1,0.5330,0.5902
1,USA.1.11_1,01021,0.7355,0.2241,0.7089,0.2207,0.8169,0.1706,USA.1.11_1,0.4685,0.7015
2,USA.1.12_1,01023,0.5392,0.4363,0.5370,0.4269,0.5531,0.4400,USA.1.12_1,0.4891,0.6085
3,USA.1.13_1,01025,0.5308,0.4466,0.5090,0.4514,0.5482,0.4454,USA.1.13_1,0.3858,0.9495
4,USA.1.14_1,01027,0.7475,0.2236,0.7221,0.2275,0.7737,0.2119,USA.1.14_1,0.2323,0.5138
...,...,...,...,...,...,...,...,...,...,...,...
3093,USA.7.8_1,09015,0.4637,0.4888,0.4538,0.4371,0.4805,0.4846,USA.7.8_1,0.1745,1.0699
3094,USA.8.1_1,10001,0.4758,0.4781,0.5006,0.4107,0.4791,0.4928,USA.8.1_1,0.2368,2.0443
3095,USA.8.2_1,10003,0.3525,0.5999,0.4493,0.4631,0.3188,0.6547,USA.8.2_1,0.3521,3.4687
3096,USA.8.3_1,10005,0.5427,0.4165,0.5149,0.3867,0.5671,0.4129,USA.8.3_1,0.5597,2.2951


### COMMUTING

In [38]:
commuting = pd.read_csv( './data/commuting-probability-network.csv', dtype = {'GEOID_x' : 'str', 'GEOID_y' : 'str'})

In [39]:
#merge datasets to have the results for each county
commuting_tmp = commuting.merge(county_votes_share[['GEOID', 'perc_rep', 'perc_dem']], how='left', left_on = 'GEOID_x', right_on = 'GEOID')
commuting_exposure = commuting_tmp.merge(county_votes_share[['GEOID', 'perc_rep', 'perc_dem']], how='left', left_on = 'GEOID_y', right_on = 'GEOID')
commuting_exposure = commuting_exposure.loc[:,~commuting_exposure.columns.duplicated()].copy()

In [40]:
commuting_exposure['tot_comm'] = commuting_exposure.groupby(['GEOID_x'])['commuting_prob'].transform('sum')

#proximity to republican
commuting_exposure['proximity_rep_comm'] = (commuting_exposure['perc_rep_y'] * (commuting_exposure['commuting_prob'] / commuting_exposure['tot_comm'])).round(4)

#proximity to democrats
commuting_exposure['proximity_dem_comm'] = (commuting_exposure['perc_dem_y'] * (commuting_exposure['commuting_prob'] / commuting_exposure['tot_comm'])).round(4)

proximity_comm = commuting_exposure.groupby(['GEOID_x']).agg({'proximity_rep_comm' : 'sum', 'proximity_dem_comm' : 'sum'}).reset_index()


In [41]:
proximity_comm

Unnamed: 0,GEOID_x,proximity_rep_comm,proximity_dem_comm
0,01001,0.5949,0.3876
1,01003,0.7082,0.2710
2,01005,0.5011,0.4901
3,01007,0.6757,0.3101
4,01009,0.8243,0.1588
...,...,...,...
3116,56037,0.7120,0.2287
3117,56039,0.3434,0.5974
3118,56041,0.7662,0.1673
3119,56043,0.7787,0.1677


In [42]:
partisan_exposure_extroversion = partisan_exposure_extroversion.merge(proximity_comm, how='left', left_on='GEOID', right_on='GEOID_x')

In [43]:
partisan_exposure_extroversion

Unnamed: 0,GID_2,GEOID,proximity_rep_coloc,proximity_dem_coloc,proximity_rep_sci,proximity_dem_sci,perc_rep,perc_dem,polygon1_id,introversion_coloc,introversion_sci,GEOID_x,proximity_rep_comm,proximity_dem_comm
0,USA.1.10_1,01019,0.7730,0.1879,0.7545,0.1802,0.8203,0.1646,USA.1.10_1,0.5330,0.5902,01019,0.7981,0.1849
1,USA.1.11_1,01021,0.7355,0.2241,0.7089,0.2207,0.8169,0.1706,USA.1.11_1,0.4685,0.7015,01021,0.7408,0.2453
2,USA.1.12_1,01023,0.5392,0.4363,0.5370,0.4269,0.5531,0.4400,USA.1.12_1,0.4891,0.6085,01023,0.5338,0.4586
3,USA.1.13_1,01025,0.5308,0.4466,0.5090,0.4514,0.5482,0.4454,USA.1.13_1,0.3858,0.9495,01025,0.5120,0.4812
4,USA.1.14_1,01027,0.7475,0.2236,0.7221,0.2275,0.7737,0.2119,USA.1.14_1,0.2323,0.5138,01027,0.7597,0.2263
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3093,USA.7.8_1,09015,0.4637,0.4888,0.4538,0.4371,0.4805,0.4846,USA.7.8_1,0.1745,1.0699,,,
3094,USA.8.1_1,10001,0.4758,0.4781,0.5006,0.4107,0.4791,0.4928,USA.8.1_1,0.2368,2.0443,10001,0.5007,0.4705
3095,USA.8.2_1,10003,0.3525,0.5999,0.4493,0.4631,0.3188,0.6547,USA.8.2_1,0.3521,3.4687,10003,0.4298,0.5385
3096,USA.8.3_1,10005,0.5427,0.4165,0.5149,0.3867,0.5671,0.4129,USA.8.3_1,0.5597,2.2951,10005,0.5635,0.4114
