# Turn node and edge data into a graph

### Read in data from previously created .txt files

In [21]:
import os.path
graph_path = 'Graph/'

In [22]:
#nodes = [line.rstrip('\n') for line in open(os.path.join(graph_path,'nodes.txt'))]
edges = [[item.strip() for item in line.rstrip('\r\n').split(' ')] for line in open(os.path.join(graph_path,'edges.txt'))]

In [23]:
nodes[:10]

NameError: name 'nodes' is not defined

In [24]:
edges[:10]

[['@IamfromQ', '@StayAmerica'],
 ['@MrMayor', '@Rtchavez'],
 ['@BenMyers1262', '@Dalesloc'],
 ['@realwayneroot', '@Jhanco1'],
 ['@CitizenFreePress', '@Mdpope1'],
 ['@realwayneroot', '@FistfulofTrumpism'],
 ['@KPatriot21', '@RepEricSwalwell'],
 ['@Shariwhittyphi', '@adirondackersforamerica'],
 ['@TheNewAmericanConservative', '@Heathermillertime'],
 ['@Linkinbio', '@Pjcali']]

## Create and process the graph

In [25]:
import matplotlib.pylab as plt
import networkx as nx

G = nx.Graph()
#G.add_nodes_from(nodes)
G.add_edges_from(edges)

In [26]:
G.number_of_nodes()

9443

In [27]:
G.number_of_edges()

61647

In [28]:
S = [G.subgraph(c).copy() for c in nx.connected_components(G)]

In [29]:
S=sorted(S, key=len, reverse=True)

In [35]:
sub=S[0]
# S[1] should have 2 nodes
# S[0] is the big one that takes forever to plot
len(sub)

9383

In [32]:
print(f'There are {G.number_of_nodes()} nodes, and')
print(f'There are {G.number_of_edges()} edges in the main graph')

There are 9443 nodes, and
There are 61647 edges in the main graph


In [33]:
print(f'There are {sub.number_of_nodes()} nodes, and')
print(f'There are {sub.number_of_edges()} edges in the subgraph')

There are 9383 nodes, and
There are 61591 edges in the subgraph


In [None]:
nx.draw(sub, node_size=100, with_labels=True)

[Degree centrality](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.centrality.degree_centrality.html#networkx.algorithms.centrality.degree_centrality)

In [36]:
# TODO: sort this dict by value for readability
deg_cent = (nx.degree_centrality(sub))
sorted_deg_cent = sorted((nx.degree_centrality(sub)).items(), key = lambda x: x[1], reverse=True)

In [37]:
# top 10
sorted_deg_cent[1:11]

[('@linwood', 0.16393093157109356),
 ('@EllieBOfficial', 0.15817522916222554),
 ('@mitchellvii', 0.11809848646344062),
 ('@ThomasFox', 0.09720741846088253),
 ('@DineshDSouza', 0.09550202515455126),
 ('@GatewayPundit', 0.07962054998934129),
 ('@realwayneroot', 0.07141334470262203),
 ('@IamfromQ', 0.0705606480494564),
 ('@TheProudBoys', 0.06970795139629077),
 ('@flintbedrock', 0.06917501598806225)]

In [38]:
# bottom 10
sorted_deg_cent[-10:]

[('@Shullalice05', 0.00010658708164570453),
 ('@MikeOBX', 0.00010658708164570453),
 ('@Pooch14329', 0.00010658708164570453),
 ('@Customsilversmithing', 0.00010658708164570453),
 ('@Timaria0930', 0.00010658708164570453),
 ('@Bosen', 0.00010658708164570453),
 ('@zarkwork', 0.00010658708164570453),
 ('@PatriotProsperity', 0.00010658708164570453),
 ('@TheJewishRight', 0.00010658708164570453),
 ('@RLea', 0.00010658708164570453)]

[Kernighan–Lin bipartition algorithm](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.kernighan_lin.kernighan_lin_bisection.html#networkx.algorithms.community.kernighan_lin.kernighan_lin_bisection)

In [None]:
from networkx.algorithms.community import kernighan_lin_bisection
klb = kernighan_lin_bisection(sub)

In [None]:
# TODO: adjust the printing of this for readability
klb

[Greedy Modularity Community](https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.community.modularity_max.greedy_modularity_communities.html#networkx.algorithms.community.modularity_max.greedy_modularity_communities)

In [None]:
from networkx.algorithms.community import greedy_modularity_communities
gmc = greedy_modularity_communities(sub)

In [None]:
# TODO: adjust the printing of this for readability
gmc

In [None]:
# Save the graph to a GXF file for later use
# Can be imported into Gephi
nx.write_gexf(sub, "subgraph.gexf")