In [1]:
from helper.util_network import construct_ind_network

In [2]:
import scipy.stats
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plot
import os

directory = 'data/issue-based networks/'
networks = {}

for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        networks[filename[:-4]] = pd.read_csv(f"{directory}/{filename}", names = ['legislator'])['legislator'].to_list()

In [3]:
networks.keys()

dict_keys(['education', 'child', 'health_care', 'bankruptcy', 'covid', 'rights', 'business', 'abortion', 'syria', 'ukraine'])

In [4]:
issue_network = construct_ind_network(networks)

In [5]:
import networkx as nx
g = nx.DiGraph()
g.add_nodes_from(issue_network.keys())
for k, v in issue_network.items():
    g.add_edges_from(([(k, t) for t in v if (t, k) not in g.edges()]))

In [6]:
print(nx.info(g))

DiGraph with 411 nodes and 60278 edges


In [7]:
nx.density(g)

0.35771170850394635

In [8]:
nx.average_clustering(g)

0.4361565930471394

In [9]:
from operator import itemgetter 

bc = nx.betweenness_centrality(g, normalized=True, endpoints=True)
bc_top10 = sorted(bc.items(), key = itemgetter(1), reverse = True)[:10]
bc_top100 = [key for key, value in sorted(bc.items(), key=lambda x: x[1], reverse= True)][:100]
ec = nx.eigenvector_centrality_numpy(g, max_iter=500)
ec_top10 = sorted(ec.items(), key = itemgetter(1), reverse = True)[:10]
ec_top100 = [key for key, value in sorted(ec.items(), key=lambda x: x[1], reverse= True)][:100]
cc = nx.closeness_centrality(g)
cc_top10 = sorted(cc.items(), key = itemgetter(1), reverse = True)[:10]
cc_top100 = [key for key, value in sorted(cc.items(), key=lambda x: x[1], reverse= True)][:100]
dc = nx.degree_centrality(g)
dc_top10 = sorted(dc.items(), key = itemgetter(1), reverse = True)[:10]
dc_top100 = [key for key, value in sorted(dc.items(), key=lambda x: x[1], reverse= True)][:100]

In [10]:
data = pd.DataFrame(bc_top10, columns=['name', 'value'])
print("Betweenness Centrality Top 10")
data

Betweenness Centrality Top 10


Unnamed: 0,name,value
0,Frederica Wilson,0.004562
1,Joe Wilson,0.004562
2,David Schweikert,0.003973
3,Rick Scott,0.003907
4,Tim Scott,0.003907
5,Christopher Coons,0.003822
6,Mark Pocan,0.003818
7,Deb Fischer,0.003716
8,Kurt Schrader,0.003699
9,Bennie Thompson,0.003687


In [11]:
data = pd.DataFrame(ec_top10, columns=['name', 'value'])
print("Eigenvector Centrality Top 10")
data

Eigenvector Centrality Top 10


Unnamed: 0,name,value
0,Gregory Meeks,0.413745
1,Sean Maloney,0.371876
2,Carolyn Maloney,0.334243
3,Will Hurd,0.300419
4,Bob Gibbs,0.270018
5,Elizabeth Warren,0.259644
6,Russ Fulcher,0.242693
7,Thom Tillis,0.233369
8,Pat Roberts,0.209753
9,Tammy Duckworth,0.188527


In [12]:
data = pd.DataFrame(cc_top10, columns=['name', 'value'])
print("Closeness Centrality Top 10")
data

Closeness Centrality Top 10


Unnamed: 0,name,value
0,Martha McSally,0.748523
1,Doug Jones,0.746176
2,Richard Durbin,0.739139
3,Roger Williams,0.734449
4,Brad Wenstrup,0.732105
5,Mac Thornberry,0.729761
6,Greg Steube,0.725075
7,Elise Stefanik,0.722733
8,John Ratcliffe,0.715708
9,Gregory Meeks,0.713291


In [13]:
data = pd.DataFrame(dc_top10, columns=['name', 'value'])
print("Degree Centrality Top 10")
data

Degree Centrality Top 10


Unnamed: 0,name,value
0,Mo Brooks,0.992683
1,Susan Brooks,0.992683
2,Abby Finkenauer,0.992683
3,Gregory Murphy,0.985366
4,Stephanie Murphy,0.985366
5,John Cornyn,0.985366
6,Christopher Murphy,0.985366
7,Chris Collins,0.985366
8,Doug Collins,0.985366
9,Susan Collins,0.985366


In [14]:
len(set(bc_top100).union(set(ec_top100)).union(set(cc_top100)).union(dc_top100))

237

In [15]:
bc_df = pd.DataFrame.from_dict(bc, orient='index')
bc_df.reset_index(inplace=True)
bc_df = bc_df.rename(columns={"index": "index", 0: " bc"})
ec_df = pd.DataFrame.from_dict(ec, orient='index')
ec_df.reset_index(inplace=True)
ec_df = ec_df.rename(columns={"index": "index", 0: " ec"})
dc_df = pd.DataFrame.from_dict(dc, orient='index')
dc_df.reset_index(inplace=True)
dc_df = dc_df.rename(columns={"index": "index", 0: " dc"})
cc_df = pd.DataFrame.from_dict(cc, orient='index')
cc_df.reset_index(inplace=True)
cc_df = cc_df.rename(columns={"index": "index", 0: " cc"})

In [16]:
def merge(df1, df2):
    df1 = (
        df1.merge(df2,
            left_on=['Legislator name'],
            right_on=['index'],
            how='left')
    )
    return df1

In [18]:
df_analysis = pd.read_csv("data/final_analysis.csv")
df_analysis

Unnamed: 0,Legislator name,Connections,Legislative Effectiveness Score,bc,ec,cc,dc,Gender Estimates,Predicted Gender,Ethnicity Estimates,Predicted Ethnicity
0,Jack Reed,"['Kirsten Gillibrand', 'Tammy Duckworth', 'Joe...",0.346327,0.001776,-2.735437e-18,0.000000,0.117329,"{'male': 0.11446011411247371, 'female': 0.0052...",male,"{'black': 7.499493095967334e-05, 'hispanic': 0...",white
1,Kirsten Gillibrand,"['Jack Reed', 'Tammy Duckworth', 'Joe Manchin ...",0.338763,0.001780,-3.847979e-18,0.001805,0.108303,"{'male': 0.00012182432734851284, 'female': 0.0...",female,"{'black': 0.003963606191111235, 'hispanic': 0....",white
2,Tammy Duckworth,"['Jack Reed', 'Kirsten Gillibrand', 'Joe Manch...",1.529729,0.001784,1.935271e-18,0.003610,0.115523,"{'male': 0.0004337136509202721, 'female': 6.94...",male,"{'black': 1.2556225757797129e-05, 'hispanic': ...",white
3,Gary Peters,"['Jack Reed', 'Kirsten Gillibrand', 'Tammy Duc...",5.015463,0.002253,1.736047e-17,0.007220,0.119134,"{'male': 0.001414442399211089, 'female': 5.194...",male,"{'black': 5.488973188361489e-05, 'hispanic': 0...",white
4,Elizabeth Warren,"['Jack Reed', 'Kirsten Gillibrand', 'Tammy Duc...",0.725357,0.001796,2.013190e-17,0.009025,0.111913,"{'male': 8.072211954203494e-05, 'female': 0.98...",female,"{'black': 1.2343688431685863e-05, 'hispanic': ...",white
...,...,...,...,...,...,...,...,...,...,...,...
476,Gary Palmer,"['Peter DeFazio', 'Grace Napolitano', 'Donald ...",1.086954,0.001707,1.510191e-01,0.398346,0.117329,"{'male': 0.06688244777941647, 'female': 0.0001...",male,"{'black': 1.7538554292170157e-05, 'hispanic': ...",white
477,Randy Weber,"['Peter DeFazio', 'Grace Napolitano', 'Donald ...",0.069581,0.001724,1.976157e-01,0.438368,0.171480,"{'male': 0.11301501154153658, 'female': 0.0001...",male,"{'black': 2.5656831556824548e-05, 'hispanic': ...",white
478,Brian Babin,"['Peter DeFazio', 'Grace Napolitano', 'Donald ...",0.562175,0.001724,2.658546e-01,0.440947,0.171480,"{'male': 0.00017225489122579384, 'female': 0.0...",female,"{'black': 1.638852910317999e-05, 'hispanic': 0...",white
479,John A. Yarmuth,"['Robert C. Scott', 'Jan Schakowsky', 'David P...",1.223520,0.001727,8.349277e-02,0.412778,0.055957,"{'male': 0.004795478958044669, 'female': 0.000...",male,"{'black': 0.01566683526845134, 'hispanic': 0.0...",white


In [19]:
df_analysis = df_analysis.drop(['Connections', ' bc', ' ec', ' cc', ' dc'], axis=1)

In [20]:
df_analysis = merge(df_analysis, bc_df)
df_analysis = merge(df_analysis, ec_df)
df_analysis = merge(df_analysis, cc_df)
df_analysis = merge(df_analysis, dc_df)
df_analysis = df_analysis.drop(columns=['index_x', 'index_y', 'index_x', 'index_y'])

  return merge(


In [21]:
df_analysis.to_csv("issue_analysis.csv")