In [3]:
%reload_ext watermark
%watermark -p pandas,networkx,numpy,matplotlib -v

CPython 3.5.2
IPython 5.0.0

pandas 0.18.0
networkx 1.11
numpy 1.11.1
matplotlib 1.5.0


# Centrality
## "Who's the Boss?"

- Definition of Centrality
- Examples (Medici family, trade example, etc.)
- Compare and contrast popular centrality measures on dataset
    - Degree
    - Closeness
    - Betweenness
    - Eigenvector


In [4]:
import networkx as nx

In [5]:
GA = nx.read_gexf('../data/ga_graph.gexf')

## Degree Centrality
Degree is a basic way of understanding a node. Intuitively, the more connections a node has, the more central the node is to the network.

NetworkX's degree centrality is calculated by taking the degree of the node and dividing by n-1 where number of nodes in G. It's value is the percent of possible edges that exist for that node.

# The degree centrality values are normalized by dividing by the maximum 
possible degree in a simple graph n-1 where n is the number of nodes in G.

In [9]:
degree_centrality = nx.degree_centrality(GA)

In [10]:
degree_centrality

{'addison': 0.0967741935483871,
 'adele': 0.03225806451612903,
 'altman': 0.06451612903225806,
 'arizona': 0.03225806451612903,
 'avery': 0.03225806451612903,
 'bailey': 0.06451612903225806,
 'ben': 0.03225806451612903,
 'chief': 0.06451612903225806,
 'colin': 0.03225806451612903,
 'denny': 0.03225806451612903,
 'derek': 0.06451612903225806,
 'ellis grey': 0.06451612903225806,
 'finn': 0.03225806451612903,
 'grey': 0.12903225806451613,
 'hank': 0.03225806451612903,
 'izzie': 0.12903225806451613,
 'karev': 0.22580645161290322,
 'kepner': 0.03225806451612903,
 'lexi': 0.0967741935483871,
 'mrs. seabury': 0.03225806451612903,
 'nancy': 0.03225806451612903,
 "o'malley": 0.12903225806451613,
 'olivia': 0.06451612903225806,
 'owen': 0.06451612903225806,
 'preston': 0.03225806451612903,
 'sloan': 0.16129032258064516,
 'steve': 0.03225806451612903,
 'susan grey': 0.03225806451612903,
 'thatch grey': 0.06451612903225806,
 'torres': 0.12903225806451613,
 'tucker': 0.03225806451612903,
 'yang': 0

In [12]:
# top 5
sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

[('karev', 0.22580645161290322),
 ('sloan', 0.16129032258064516),
 ('grey', 0.12903225806451613),
 ("o'malley", 0.12903225806451613),
 ('torres', 0.12903225806451613)]

In [13]:
# apply measurements back to Graph
nx.set_node_attributes(GA, 'degree centrality', degree_centrality)

In [15]:
GA.node['karev']

{'degree centrality': 0.22580645161290322, 'label': 'karev'}

## Closeness Centrality
Closeness Centrality measures how many "hops" it would take to reach every other node in a network (taking the shortest path). It can be informally thought as 'average distance' to all other nodes.

In NetworkX, it the reciporical of of the *average* value, which normalizes the value in a 0 to 1 range. If you again take the reciporical of this, you'll find the *average* distance to all other nodes.


NetworkX: $ \frac{n - 1}{\sum_{v=1}^{n-1} d(v, u)} $


⚠️
__Note__: `If the graph is not completely connected, this algorithm computes the closeness centrality for each connected part separately.` [[Source](https://networkx.github.io/documentation/development/_modules/networkx/algorithms/centrality/closeness.html)]

In [18]:
closeness_centrality = nx.closeness_centrality(GA)

In [19]:
closeness_centrality

{'addison': 0.2892290869327502,
 'adele': 0.05161290322580645,
 'altman': 0.2337604949182501,
 'arizona': 0.21600653327888933,
 'avery': 0.19614386355209493,
 'bailey': 0.06451612903225806,
 'ben': 0.04301075268817204,
 'chief': 0.07373271889400922,
 'colin': 0.13228307076769194,
 'denny': 0.18752215526409075,
 'derek': 0.2337604949182501,
 'ellis grey': 0.08602150537634408,
 'finn': 0.17236884978820463,
 'grey': 0.2216170925848345,
 'hank': 0.18752215526409075,
 'izzie': 0.24731182795698925,
 'karev': 0.2892290869327502,
 'kepner': 0.21067303863002787,
 'lexi': 0.26253101736972706,
 'mrs. seabury': 0.21067303863002787,
 'nancy': 0.21067303863002787,
 "o'malley": 0.2708653353814644,
 'olivia': 0.2337604949182501,
 'owen': 0.19173613628126135,
 'preston': 0.13228307076769194,
 'sloan': 0.2892290869327502,
 'steve': 0.17236884978820463,
 'susan grey': 0.05161290322580645,
 'thatch grey': 0.07373271889400922,
 'torres': 0.29937747594793435,
 'tucker': 0.04301075268817204,
 'yang': 0.15948

In [20]:
# top 5
sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:5]

[('torres', 0.29937747594793435),
 ('addison', 0.2892290869327502),
 ('sloan', 0.2892290869327502),
 ('karev', 0.2892290869327502),
 ("o'malley", 0.2708653353814644)]

In [23]:
# apply measurements back to Graph
nx.set_node_attributes(GA, 'closeness centrality', closeness_centrality)

In [25]:
# average distance of torres:
1 / GA.node['torres']['closeness centrality']

3.340264650283554

{'degree centrality': 0.22580645161290322, 'label': 'karev'}