# Getting Started: Reading files, importing data
Use the `quakers_nodelist.csv` and `quakers_edgelist.csv` files and store the contents within them

In [16]:
import csv
from operator import itemgetter
import networkx as nx
from networkx.algorithms import community # This part of networkx, for community detection, needs to be imported separately.

In [6]:
with open('resources/quakers_nodelist.csv', 'r') as nodecsv:
    nodereader = csv.reader(nodecsv)
    # Retrieve the data (using Python list comphrension and list slicing to remove the header row)
    nodes = [n for n in nodereader][1:]

node_names = [n[0] for n in nodes]

with open('resources/quakers_edgelist.csv', 'r') as edgecsv:
    edgereader = csv.reader(edgecsv)
    edges = [tuple(e) for e in edgereader][1:] # Retreive the data

print(f'Number of Nodes: {len(node_names)}')
print(f'Number of Edges: {len(edges)}')

Number of Nodes: 119
Number of Edges: 174


# Basics of NetworkX: Creating the Graph
Use the above two lists to create a single network object that understands how nodes and edges are related.

In [15]:
G = nx.Graph()

G.add_nodes_from(node_names)
G.add_edges_from(edges)

print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 119
Number of edges: 174
Average degree:   2.9244


### Adding Attributes
So far we have added nodes and edges, but NetworkX allows us to add *attributes* to both nodes and edges, providing more information about each of them. Attributes are a form of dictionaries, in which node names are the keys and the attributes you want to add are the values

In [9]:
hist_sig_dict = {}
gender_dict = {}
birth_dict = {}
death_dict = {}
id_dict = {}

for node in nodes: # Loop through the list, one row at a time
    #dictionary[id] = value
    hist_sig_dict[node[0]] = node[1]
    gender_dict[node[0]] = node[2]
    birth_dict[node[0]] = node[3]
    death_dict[node[0]] = node[4]
    id_dict[node[0]] = node[5]

In [10]:
nx.set_node_attributes(G, hist_sig_dict, 'historical_significance')
nx.set_node_attributes(G, gender_dict, 'gender')
nx.set_node_attributes(G, birth_dict, 'birth_year')
nx.set_node_attributes(G, death_dict, 'death_year')
nx.set_node_attributes(G, id_dict, 'sdfb_id')

Now all of the nodes have these attributes associated to them. Can access them like an array (with the string as the identifier)

In [11]:
for n in G.nodes():
    print(n, G.nodes[n]['birth_year'])

Joseph Wyeth 1663
Alexander Skene of Newtyle 1621
James Logan 1674
Dorcas Erbery 1656
Lilias Skene 1626
William Mucklow 1630
Thomas Salthouse 1630
William Dewsbury 1621
John Audland 1630
Richard Claridge 1649
William Bradford 1663
Fettiplace Bellers 1687
John Bellers 1654
Isabel Yeamans 1637
George Fox the younger 1551
George Fox 1624
John Stubbs 1618
Anne Camm 1627
John Camm 1605
Thomas Camm 1640
Katharine Evans 1618
Lydia Lancaster 1683
Samuel Clarridge 1631
Thomas Lower 1633
Gervase Benson 1569
Stephen Crisp 1628
James Claypoole 1634
Thomas Holme 1626
John Freame 1665
John Swinton 1620
William Mead 1627
Henry Pickworth 1673
John Crook 1616
Gilbert Latey 1626
Ellis Hookes 1635
Joseph Besse 1683
James Nayler 1618
Elizabeth Hooten 1562
George Whitehead 1637
John Whitehead 1630
William Crouch 1628
Benjamin Furly 1636
Silvanus Bevan 1691
Robert Rich 1607
John Whiting 1656
Christopher Taylor 1614
Thomas Lawson 1630
Richard Farnworth 1630
William Coddington 1601
Thomas Taylor 1617
Richard 

# Metrics available in NetworkX

In [18]:
density = nx.density(G)
print(f'Network Density: {density}')

Network Density: 0.02478279447372169


In [20]:
fell_whitehead_path = nx.shortest_path(G, source='Margaret Fell', target='George Whitehead')
print(f'Shortest path between Fell and Whitehead: {fell_whitehead_path}')
print(f'Length of that path: {len(fell_whitehead_path)}')

Shortest path between Fell and Whitehead: ['Margaret Fell', 'George Fox', 'George Whitehead']
Length of that path: 3


In [24]:
# If your graph is more than one component, this will return False:
print(nx.is_connected(G))

# Next, use nx.connected_components to get the list of components,
# then use the max() command to find the largest one
components = nx.connected_components(G)
largest_component = max(components, key=len)

# Create a "subgraph" of just the largest component
# Then calculate the diameter of the subgraph, just like you did with density

subgraph = G.subgraph(largest_component)
diameter = nx.diameter(subgraph)
print(f'Network diameter of largest component: {diameter}')

False
Network diameter of largest component: 8


**Triadic Closure:** Supposes that if two people know the same person, they are likely to know each other. This creates a *triangle* in teh visualisation of three edge nodes.
**Transitivity:** Structural network measure for measuring triadic closure. Expresses how interconnected a graph is in terms of a ratio of actual over possible connections. Concerns likelihoods rather than certainties.
**Clustering Coefficient:** Another way of measuring triadic closure.

In [25]:
triadic_closure = nx.transitivity(G)
print(f'Triadic closure: {triadic_closure}')

Triadic closure: 0.16937799043062202


Because the graph is not very dense, there are fewer *possible triangles* to begin with, which may result in a slightly higher transitivity. That is, nodes that already have lots of connections are likely to be part of these enclosed triangles. To back this up, we need to know more about the nodes with many connections