In [2]:
import random, math, pickle, string
import pandas as pd, numpy as np, matplotlib.pyplot as plt, matplotlib.colors as mcolors, networkx as nx, scipy as sp
from collections import defaultdict, Counter, OrderedDict
from itertools import combinations, chain
import pygraphviz
from networkx.drawing.nx_agraph import graphviz_layout
import plotly.graph_objects as go
import plotly.figure_factory as ff
from cdlib import algorithms # !pip install cdlib and !pip install leidenalg

import bokeh
from bokeh.io import push_notebook, show, output_notebook, save, output_file
import bokeh.plotting as bp
from bokeh.plotting import figure, save, output_file, show 
from bokeh.models import (ColumnDataSource, LabelSet, Label, BoxSelectTool, Circle, EdgesAndLinkedNodes, HoverTool,MultiLine, NodesAndLinkedEdges, Plot, Range1d, TapTool,)
from holoviews.element.graphs import layout_nodes

output_notebook()
import holoviews as hv
from holoviews import dim, opts
hv.extension('bokeh', 'matplotlib')
from holoviews.operation import  gridmatrix
from holoviews.operation.datashader import datashade, bundle_graph
from holoviews import Graph, Nodes
from holoviews.plotting.bokeh import GraphPlot, LabelsPlot
import hvplot.networkx as hvnx
import hvplot.pandas

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, message="Note") 
warnings.filterwarnings("ignore", message="Note") 
warnings.simplefilter('ignore')

Note: to be able to use all crisp methods, you need to install some additional packages:  {'graph_tool', 'bayanpy', 'infomap'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'ASLPAw', 'pyclustering'}
Note: to be able to use all crisp methods, you need to install some additional packages:  {'infomap'}


In [10]:
# All Functions

def node_sizes_scaling(G,a=0,b=1,mode='lin'): # c=10,
    if mode=='lin':
        ns = [a + b*G.in_degree(node) for node in G.nodes()] 
    if mode=='log':
        ns = [a + b*np.log(G.in_degree(node)) for node in G.nodes()]  
    return ns

def hv_plot_graph(G, node_sizes, arrowhead_length, plot_size, pos=None, nodelabels=1, node_color="green",bundled=None,
                  partition=None, partition_colors=None, edge_color='gray',edge_line_width=1,
                  title=None, fontsize=None,
                  xoffset=0, yoffset=0, text_font_size=None, text_color=None, bgcolor="white"):
    if pos is None:
        pos = nx.spring_layout(G)
    
    pos_df = pd.DataFrame.from_dict(pos, orient='index', columns=['x', 'y'])
    pos_df['index'] = pos_df.index
    
    if partition is not None:
        node_colors = [partition_colors[partition[node]] for node in G.nodes()]
    else:
        node_colors = node_color
    
    plot = hvnx.draw(G, pos=pos, node_size=node_sizes, node_color=node_colors,
                     edge_color=edge_color, edge_line_width=edge_line_width)

    plot.opts(width=plot_size[0], height=plot_size[1], arrowhead_length=arrowhead_length, bgcolor=bgcolor)
    
    if title is not None:
        plot = plot.opts(title=title, fontsize=fontsize.get('title', '9pt'))
            
    if bundled == 0:
        if nodelabels == 1:
            labels = hv.Labels(pos_df, ['x', 'y'], 'index')
            plot = plot * labels.opts(xoffset=xoffset, yoffset=yoffset,
                                      text_font_size=text_font_size, text_color=text_color,
                                      fontsize=fontsize, bgcolor=bgcolor)
            return plot
        else:
            return plot
        
    if bundled == 1:
        plot = bundle_graph(plot)
        if nodelabels==1:
            labels = hv.Labels(pos_df, ['x', 'y'], 'index')
            plot = plot * labels.opts(xoffset=xoffset, yoffset=yoffset,
                                      text_font_size=text_font_size, text_color=text_color,
                                      fontsize=fontsize, bgcolor=bgcolor)
            return plot
        else:
            return plot

    
def graph_attributes(G,nodedatalist,edgedatalist):
    # nodedatalist/edgedatalist is a list of tuples with first element a dictionary and second element the attribute name
    if nodedatalist!=None:
        for (d,s) in nodedatalist:
            for n in G.nodes():
                G.nodes[n][s] = d[n]
    if edgedatalist!=None:
        for (d,s) in edgedatalist:
            for e in G.edges():
                G.edges[e][s] = d[e]

In [3]:
dfs = pd.read_pickle("US_Senate_2024_df_updated.pickle")
dfs['incumbency'] = dfs['incumbency'].fillna('Challenger')
print(len(dfs))
dfs

450


Unnamed: 0,candidate,incumbency,gender,party,office,status,State,Wiki page,Wiki page (validated)
0,Kari Lake,Challenger,,Republican,U.S. Senate Arizona,On the Ballot Primary,AZ,Kari Lake,Kari Lake
1,Mark Lamb,Challenger,,Republican,U.S. Senate Arizona,On the Ballot Primary,AZ,Mark Lamb,Mark Lamb (sheriff)
2,Elizabeth Reye,Challenger,,Republican,U.S. Senate Arizona,On the Ballot Primary,AZ,,
3,Ruben Gallego,Challenger,,Democratic,U.S. Senate Arizona,On the Ballot Primary,AZ,Ruben Gallego,Ruben Gallego
4,Arturo Hernandez,Challenger,,Green,U.S. Senate Arizona,On the Ballot Primary,AZ,Arturo Hernandez,
...,...,...,...,...,...,...,...,...,...
445,Phillip Anderson,Challenger,,Libertarian,U.S. Senate Wisconsin,Candidacy Declared General,WI,Phillip Anderson,
446,Joshua Harrington,Challenger,,No Party Affiliation,U.S. Senate Wisconsin,Candidacy Declared General,WI,,
447,Stacey Klein,Challenger,,Republican,U.S. Senate Wisconsin,Withdrew Primary,WI,,
448,John Barrasso,Incumbent,,Republican,U.S. Senate Wyoming,Candidacy Declared Primary,WY,,John Barrasso


In [4]:
senate_candidate_incumbency_d = dict(zip(dfs['candidate'], dfs['incumbency']))
senate_candidate_party_d = dict(zip(dfs['candidate'], dfs['party']))
senate_candidate_state_d = dict(zip(dfs['candidate'], dfs['State']))
senate_candidate_status_d = dict(zip(dfs['candidate'], dfs['status']))

with open("US_Senate_2024_graph_updated.pickle", "rb") as f:
    Gs = pickle.load(f)
s_attr_dicts = [senate_candidate_incumbency_d, senate_candidate_party_d, senate_candidate_state_d, senate_candidate_status_d]
attr_names = ["incumbency", "party", "state", "status"]
for attr_dict, attr_name in zip(s_attr_dicts, attr_names):
    nx.set_node_attributes(Gs, attr_dict, attr_name)
    
reciprocated_edges = [(u, v) for u, v in Gs.edges() if Gs.has_edge(v, u)]
recGs = Gs.edge_subgraph(reciprocated_edges)
recGs = recGs.to_undirected()
for attr_dict, attr_name in zip(s_attr_dicts, attr_names):
    nx.set_node_attributes(recGs, attr_dict, attr_name)

## 2. Hyperlink Graphs

### Fig 1: Senate Candidates Hyperlink Graph of Wikipedia Pages

In [11]:
party_d = dfs.set_index('candidate')['party'].to_dict()
office_d = dfs.set_index('candidate')['office'].to_dict()
status_d = dfs.set_index('candidate')['status'].to_dict()
state_d = dfs.set_index('candidate')['State'].to_dict()
incumbency_d = dfs.set_index('candidate')['incumbency'].to_dict()
incumbency_d = {key: 'none' if value is None else value for key, value in incumbency_d.items()}
out_degree_d=dict(Gs.out_degree) 
in_degree_d=dict(Gs.in_degree) 

color_d = {}
for n in Gs.nodes():
    if party_d[n]=='Republican':
        color_d[n]='red'
    elif party_d[n]=='Democratic':
        color_d[n]='blue'
    else:
        color_d[n]='lime'

nodedatalist=[(out_degree_d,"out degree"),(in_degree_d,"in degree"),(party_d,"party"),(office_d,"office"),
              (status_d,"status"),(state_d,"state"),(color_d,"color"),(incumbency_d,"incumbency")] 
graph_attributes(Gs,nodedatalist,edgedatalist=None)

ti="The hyperlink graph of the US Senate candidates' wiki pages"
node_sizes=node_sizes_scaling(Gs,a=50,b=10,mode='lin') 
plot = hv_plot_graph(Gs, node_sizes, arrowhead_length=0.015, plot_size=(1000,1000), pos=graphviz_layout(Gs), 
                     nodelabels=0, node_color="color", bundled=0,
                     partition=None, partition_colors=None, edge_color='olive', edge_line_width=1,
                     title=ti, fontsize={'title': '15pt'}, 
                     xoffset=0, yoffset=0, text_font_size='5pt', text_color='midnightblue', bgcolor="white") #.redim(**{"color": None})
dicts_to_remove = ["color"]
# Loop through the dictionaries to remove
for dict_name in dicts_to_remove:
    if dict_name in plot.data.keys():
        del plot.data[dict_name]
plot

### Fig 2: House Candidates Hyperlink Graph of Wikipedia Pages

## 3. Connectivity of Hyperlink Graphs

### Table 1: Senate - Weakly Connected Component Subgraphs

In [13]:
G = Gs

list_of_weakly_connected_components=sorted(nx.weakly_connected_components(G), key=len, reverse=True)
k=nx.number_weakly_connected_components(G)

no_of_canditates=[len(list_of_weakly_connected_components[i]) for i in range(k)]
sg=[G.subgraph(list_of_weakly_connected_components[i]) for i in range(k)]
sg_nodes=[len(graph.nodes) for graph in sg]
sg_edges=[len(graph.edges) for graph in sg]

table=pd.DataFrame({"Enumeration":range(k),
                    "No. of candidates":no_of_canditates,
                    "No. vertices in subgraph":sg_nodes,
                    "No. edges in subgraph":sg_edges})

table

Unnamed: 0,Enumeration,No. of candidates,No. vertices in subgraph,No. edges in subgraph
0,0,56,56,580
1,1,2,2,1
