# Analyzing Witness Networks

[David J. Thomas](mailto:dave.a.base@gmail.com), [thePort.us](http://thePort.us)<br />
Instructor of Ancient History and Digital Humanities,<br />
Department of History,<br />
[University of South Florida](https://github.com/usf-portal)

---

## This workbook will...

* Use the `networkx` module to analyze witnesses in the charters
* Calculate network statistics like degree, betweeness centrality, community detection, and more
* Export the network with statistics for use elsewhere in programs like Gephi
* Visualize the network as a whole and in parts
* Attempt to identify major figures at different levels of the network

**THIS WORKBOOK IS FUNCTIONAL BUT LACKS MUCH COMMENTARY: A PROEMIUM, MORE FUNCTIONALITY, AND MORE EXPLANATORY MATERIAL TO BE ADDED LATER**


---

## 1) Import Module Dependencies

The cell below loads all other Python packages needed. You **must** run this before any other cells.

In [None]:
from IPython.core.display import display, HTML
import networkx as nx
import community
import matplotlib.pyplot as plt
import dhelp

print('Dependencies loaded!')

## 2) Crunch Network Statistics and Export Graph

This step will take advantage of networkx's statistical capabilities. Perhaps most importantly, the ability to measure various kinds of [network centralities](https://en.wikipedia.org/wiki/Centrality).

**THIS STEP WILL TAKE AWHILE, BE PATIENT**

In [None]:
print('Reading network file...')
witness_network = nx.read_gexf('../data/witness_network.gexf')
print('Done!')

def crunch_network(network):
    print('Crunching various network statistics (takes awhile)...\n')
    results = {
        'degree': {}, 'community': {}, 'degree_centrality': {}, 'betweeness_centrality': {},
        'eigenvector_centrality': {}, 'closeness_centrality': {}, 'harmonic_centrality': {}
    }
    # degrees and community
    print('Degrees...', end='')
    results['degree'] = nx.degree(witness_network)
    print(' Done!\nCommunity detection with best partition...', end='')
    results['community'] = community.best_partition(witness_network)
    # centralities
    print(' Done!\nDegree centrality...', end='')
    results['degree_centrality'] = nx.degree_centrality(witness_network)
    print(' Done!\nBetweeness (shortest-path) centrality...', end='')
    results['betweeness_centrality'] = nx.betweenness_centrality(witness_network)
    print(' Done!\nEigenvector centrality...', end='')
    results['eigenvector_centrality'] = dict(nx.eigenvector_centrality(witness_network))
    print(' Done!\nCloseness centrality...', end='')
    results['closeness_centrality'] = nx.closeness_centrality(witness_network)
    print(' Done!\nHarmonic centrality...', end='')
    results['harmonic_centrality'] =  nx.harmonic_centrality(witness_network)
    print(' Done!\n\nFinished calculating network statistics.')
    # now take result dicts and feed them back into nodes as attributes
    for attribute_name in results.keys():
        attribute_results = results[attribute_name]
        for node_id in network.nodes():
            new_node_stat = attribute_results[node_id]
            network.nodes[node_id][attribute_name] = new_node_stat
    return network


witness_network = crunch_network(witness_network)
print('Exporting network with calculated statistics to export/witness_network.gexf')
nx.write_gexf(witness_network, '../data/witness_network.gexf')
print('Exported network successfully.')

## 5) Visualize Entire Witness Network

It's time to finally see some sweet, sweet, payoff! We can use networkx itself to visualize our network and see what's interesting.

In [None]:
def draw_network(network, with_labels=False):
    node_size_map = [network.nodes[node]['betweeness_centrality'] * 3000 for node in network.nodes()]
    node_color_map = [network.nodes[node]['community'] for node in network.nodes()]
    options = {
        'node_color': node_color_map,
        'node_size': node_size_map,
        'width': 0.1,
        'alpha': 0.5,
        'with_labels': with_labels
    }
    print('Generating network preview...')
    nx.draw(network, pos=nx.spring_layout(network, k=0.05, scale=1), **options)
    plt.draw()
    return True


print('Entire Witness Network')
draw_network(witness_network)

## 6) Finding Key Figures

### 6a) Listing Top People by Attributes

In [None]:
def order_nodes_by_attribute(network, attribute_name):
    totals = {}
    # build a key/val dict from id/val of node
    for node in network.nodes():
        totals[node] = network.nodes[node][attribute_name]
    # sort into ordered list of tuples with id/val and return
    return [(key, totals[key]) for key in sorted(totals, key=totals.get, reverse=True)]


def print_top_nodes_by_attribute(network, attribute_name, print_name, limit=5):
    print('----- {} -----'.format(print_name))
    counter = 0
    for node_id, val in order_nodes_by_attribute(network, attribute_name)[0:4]:
        counter += 1
        node_link = network.nodes[node_id]['link']
        print('{}.) {}: {}'.format(counter, node_id, node_link, val))


attributes_to_print = [
    ('degree', 'Degree'),
    ('betweeness_centrality', 'Betweeness Centrality'),
    ('degree_centrality', 'Degree Centrality'),
    ('closeness_centrality', 'Closeness Centrality'),
    ('eigenvector_centrality', 'Eigenvector Centrality'),
    ('harmonic_centrality', 'Harmonic Centrality')
]

for attribute_name, print_name in attributes_to_print:
    print_top_nodes_by_attribute(witness_network, attribute_name, print_name)
    print('')

### 6b) Looking into Individuals

In [None]:
def lookup_person(network, node_id):
    link = network.nodes[node_id]['link']
    pase_content = None
    with dhelp.WebPage(link) as page_soup:
        pase_content = page_soup
    display(HTML(pase_content.get_text()))
    return pase_content

notable_persons = ['Alfred 8', 'Wulfred 6', 'Eadwulf 7', 'Ceolnoth 3', 'Æthelbald 4', 'Cenwealh 2', 'Offa 7']

for notable_person in notable_persons:
    print('---', notable_person)
    lookup_person(witness_network, notable_person)

print('Finished displaying persons')

### 8) Looking into Sub-Communities

In [None]:
def get_community(network, community_num):
    """Returns a new network with only nodes/edges from a specified community"""
    non_community_nodes = []
    # perform full copy of network
    subnetwork = network.copy()
    # identify only the nodes to be keps
    for node_id in subnetwork.nodes():
        if subnetwork.nodes[node_id]['community'] != community_num:
            non_community_nodes.append(node_id)
    subnetwork.remove_nodes_from(non_community_nodes)
    # make new subgraph with select nodes and crunch new stats
    return crunch_network(subnetwork)


print('Alfred the Great')
alfred_community = get_community(witness_network, witness_network.nodes['Alfred 8']['community'])
draw_network(alfred_community)

for attribute_name, print_name in attributes_to_print:
    print_top_nodes_by_attribute(alfred_community, attribute_name, print_name)
    print('')

In [None]:
print('Offa of Mercia')
offa_community = get_community(witness_network, witness_network.nodes['Offa 7']['community'])
draw_network(offa_community, with_labels=True)

for attribute_name, print_name in attributes_to_print:
    print_top_nodes_by_attribute(offa_community, attribute_name, print_name)
    print('')