<b>Betweenness Centrality for Removing Nodes</b>

This notebook is for reviewing how to find the betweenness centrality for nodes and finding how to remove individual nodes from graphs, and how that affects the set.

In [5]:
import csv

import numpy as np
import networkx as nx 
import pandas as pd

In [2]:
#First set of usable data - from Logan Schmidt using James Tattersall's sheets (~1000 pts)
!head one.csv

-13	1	12/01/48	An Esteemed Correspondent	J. Alsop ;Digamma *;Thomas Cranstoun *;John Giblin *;J.M. *	1;	Jan-49;	xx;xx	41;
-12	1	01/01/49	T. Morley	Unknown ;T.J.L. 	1;	Feb-49;Feb-49	xx;xx	9;
-11	1	02/01/49	T.J.L. 	J.W. 	1	03/01/49	xx	41
-10	1	02/01/49	Enquirer	T. Morley 	1	03/01/49	xx	22
-9	1	03/01/49	Thomas Morley	J.W. 	1	04/01/49	xx	22
-8	1	04/01/49	Thomas Morley	UNKNOWN;UNKNOWN	1;1	May-49;Jun-49	xx;xx	41;
-7	1	04/01/49	Collegian	J.W. 	1	05/01/49	xx	22
-6	1	05/01/49	Gomphos					3
-5	1	05/01/49	S.A.G.					74
-4	1	06/01/49	Geometricus	 UNKNOWN	1	07/01/49	xx	47


In [3]:
#Select some data to work with for this session

#first create list using all columns
allData = []
with open('one.csv', 'r') as f:
    filereader = csv.reader(f, delimiter="\t", quotechar='"')
    #next(filereader) # skips header row, we don't want to skip since we don't have a header
    for row in filereader:
            allData.append(row)
            
            
#for this practice, lets keep seperate some data by dates and we can compare them!
#All the interactions during the 1840's using a new bit of code
forties = []
edge = []
i=0
for x in allData:
    date=x[2].split("/")
    if len(date)==3:
        if date[2].startswith('4'): #This helps single out decades instead of specific years
        #if date[2]=="49":
            edge.append([x[3],x[4], x[2]])
            forties.append(edge[i])
            i=i+1
            
#fix the solver column by seperating out responders, whitespace, and formatting unknowns
fixedSolver = []
edge = []
i=0

unknown = {"", "Unknown", "unknown"}
for x in forties:
    if ";" in x[1]:
        solvers = x[1].split(";")
        for y in solvers:
            one=x[0].replace(' ', '')
            two=y.replace(' ', '')
            if(one in unknown):
                one="UNKNOWN"
            if(two in unknown):
                two="UNKNOWN"
            fixedSolver.append([one,two])
    else:
        one=x[0].replace(' ', '')
        two=x[1].replace(' ', '')
        if(one in unknown):
            one="UNKNOWN"
        if(two in unknown):
            two="UNKNOWN"
        fixedSolver.append([one,two])
    #i=i+1

In [4]:
fixedSolver

[['AnEsteemedCorrespondent', 'J.Alsop'],
 ['AnEsteemedCorrespondent', 'Digamma*'],
 ['AnEsteemedCorrespondent', 'ThomasCranstoun*'],
 ['AnEsteemedCorrespondent', 'JohnGiblin*'],
 ['AnEsteemedCorrespondent', 'J.M.*'],
 ['T.Morley', 'UNKNOWN'],
 ['T.Morley', 'T.J.L.'],
 ['T.J.L.', 'J.W.'],
 ['Enquirer', 'T.Morley'],
 ['ThomasMorley', 'J.W.'],
 ['ThomasMorley', 'UNKNOWN'],
 ['ThomasMorley', 'UNKNOWN'],
 ['Collegian', 'J.W.'],
 ['Gomphos', 'UNKNOWN'],
 ['S.A.G.', 'UNKNOWN'],
 ['Geometricus', 'UNKNOWN'],
 ['UNKNOWN', 'J.W.'],
 ['Geometricus', 'UNKNOWN'],
 ['J.W.', 'UNKNOWN'],
 ['Geometricus', 'ThomasWilkinson'],
 ['Philo-Mathematicus', 'ThomasWilkinson'],
 ['UNKNOWN', 'UNKNOWN'],
 ['Theta', 'ThomasWilkinson'],
 ['Theta', 'ThomasWilkinson'],
 ['Theta', 'ThomasWilkinson'],
 ['ThomasWilkinson', 'ThomasWilkinson'],
 ['ThomasWilkinson', 'ThomasWilkinson'],
 ['Geometricus', 'ThomasWilkinson'],
 ['UNKNOWN', 'J.S.'],
 ['UNKNOWN', 'ThomasWilkinson*'],
 ['UNKNOWN', 'J.M.[ofBiggleswade]*'],
 ['UNKNOWN

In [17]:
#create graph using networkx
solverGraph = nx.from_edgelist(fixedSolver)
print(nx.info(solverGraph))

betweennessSG = nx.betweenness_centrality(solverGraph)
betweennessSG

#sort dictionary by betweenness centralities
import operator
sortedSG = sorted(betweennessSG.items(), key=operator.itemgetter(1))
sortedSG

#Find that Thomas Wilkinson would be the first to be removed
#That node needs to be removed and all references, then centrality run again

Name: 
Type: Graph
Number of nodes: 28
Number of edges: 36
Average degree:   2.5714


[('J.Alsop', 0.0),
 ('Digamma*', 0.0),
 ('ThomasCranstoun*', 0.0),
 ('JohnGiblin*', 0.0),
 ('J.M.*', 0.0),
 ('Enquirer', 0.0),
 ('Collegian', 0.0),
 ('Gomphos', 0.0),
 ('S.A.G.', 0.0),
 ('Philo-Mathematicus', 0.0),
 ('Theta', 0.0),
 ('J.S.', 0.0),
 ('J.M.[ofBiggleswade]*', 0.0),
 ('J.M.[fromPlymouth]*', 0.0),
 ('J.M.[ofBiggleswade]', 0.0),
 ('SeptimusTebay', 0.0),
 ('J.S.*', 0.0),
 ('T.Wilkinson*', 0.0),
 ('SeptimusTebay*', 0.0),
 ('T.J.L.', 0.017094017094017096),
 ('AnEsteemedCorrespondent', 0.02849002849002849),
 ('Geometricus', 0.046343779677113),
 ('T.Morley', 0.06528964862298196),
 ('ThomasWilkinson*', 0.07369420702754037),
 ('ThomasMorley', 0.07393162393162393),
 ('J.W.', 0.18219373219373222),
 ('UNKNOWN', 0.26866096866096856),
 ('ThomasWilkinson', 0.3468660968660969)]

In [19]:
firstIteration = [];
for edge in fixedSolver:
    if edge[0]!="ThomasWilkinson":
        if edge[1]!="ThomasWilkinson":
            firstIteration.append(edge)

In [20]:
firstIteration


[['AnEsteemedCorrespondent', 'J.Alsop'],
 ['AnEsteemedCorrespondent', 'Digamma*'],
 ['AnEsteemedCorrespondent', 'ThomasCranstoun*'],
 ['AnEsteemedCorrespondent', 'JohnGiblin*'],
 ['AnEsteemedCorrespondent', 'J.M.*'],
 ['T.Morley', 'UNKNOWN'],
 ['T.Morley', 'T.J.L.'],
 ['T.J.L.', 'J.W.'],
 ['Enquirer', 'T.Morley'],
 ['ThomasMorley', 'J.W.'],
 ['ThomasMorley', 'UNKNOWN'],
 ['ThomasMorley', 'UNKNOWN'],
 ['Collegian', 'J.W.'],
 ['Gomphos', 'UNKNOWN'],
 ['S.A.G.', 'UNKNOWN'],
 ['Geometricus', 'UNKNOWN'],
 ['UNKNOWN', 'J.W.'],
 ['Geometricus', 'UNKNOWN'],
 ['J.W.', 'UNKNOWN'],
 ['UNKNOWN', 'UNKNOWN'],
 ['UNKNOWN', 'J.S.'],
 ['UNKNOWN', 'ThomasWilkinson*'],
 ['UNKNOWN', 'J.M.[ofBiggleswade]*'],
 ['UNKNOWN', 'J.S.'],
 ['UNKNOWN', 'ThomasWilkinson*'],
 ['UNKNOWN', 'J.M.[ofBiggleswade]*'],
 ['ThomasMorley', 'J.S.'],
 ['ThomasMorley', 'ThomasWilkinson*'],
 ['ThomasMorley', 'J.M.[fromPlymouth]*'],
 ['SeptimusTebay', 'SeptimusTebay'],
 ['Geometricus', 'Geometricus'],
 ['SeptimusTebay', 'SeptimusTeb

In [22]:
#create graph using networkx
solverGraph1 = nx.from_edgelist(firstIteration)
print(nx.info(solverGraph1))

betweennessSG1 = nx.betweenness_centrality(solverGraph1)
betweennessSG1

#sort dictionary by betweenness centralities
import operator
sortedSG1 = sorted(betweennessSG1.items(), key=operator.itemgetter(1))
sortedSG1

#Since we removed Thomas Wilkinson, we also removed some others at the same time.
#He had connections with some authors that others did not.

Name: 
Type: Graph
Number of nodes: 21
Number of edges: 25
Average degree:   2.3810


[('J.Alsop', 0.0),
 ('Digamma*', 0.0),
 ('ThomasCranstoun*', 0.0),
 ('JohnGiblin*', 0.0),
 ('J.M.*', 0.0),
 ('Enquirer', 0.0),
 ('Collegian', 0.0),
 ('Gomphos', 0.0),
 ('S.A.G.', 0.0),
 ('Geometricus', 0.0),
 ('J.S.', 0.0),
 ('ThomasWilkinson*', 0.0),
 ('J.M.[ofBiggleswade]*', 0.0),
 ('J.M.[fromPlymouth]*', 0.0),
 ('SeptimusTebay', 0.0),
 ('T.J.L.', 0.010526315789473684),
 ('AnEsteemedCorrespondent', 0.05263157894736842),
 ('T.Morley', 0.07982456140350876),
 ('ThomasMorley', 0.07982456140350877),
 ('J.W.', 0.093859649122807),
 ('UNKNOWN', 0.29385964912280704)]