In [29]:
import os
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd

%matplotlib inline

In [30]:
from pathlib import Path

path = Path('../data/processed_except/relation_median.csv').resolve();
di_graph = nx.DiGraph()
with open(path, 'r') as input_file:
    while True:
        line = input_file.readline().rstrip()

        # EOF
        if line == '':
            break

        splits = line.split(',')
        # prevent self-loop
        if splits[0] != splits[1]:
            di_graph.add_edge(splits[1], splits[0])

# Finding the Giant Component
undi_graph = di_graph.to_undirected()
graph_components = (undi_graph.subgraph(c) for c in nx.connected_components(undi_graph));
graph_components = sorted(graph_components, key=len, reverse=True)

giant_component =  di_graph.subgraph(graph_components[0].nodes())

edge_df = pd.DataFrame(columns=[ 'source','dest' ])
for index, edge in enumerate(giant_component.edges()):
    (src, dest) = edge
    edge_df.loc[index] = [ src, dest ]

edge_df.head()

Unnamed: 0,source,dest
0,1,52
1,1,4
2,1,80
3,1,145
4,1,84


### Edge Betweenness Centrality

In [31]:
from operator import itemgetter
edge_betweeness = sorted(nx.edge_betweenness_centrality(giant_component).items(), key=itemgetter(1), reverse=True)
ebet_df = pd.DataFrame(columns=["source", "dest", "bedetweeness"])
for index, bet in enumerate(edge_betweeness):
    ebet_df.loc[index] = [bet[0][0], bet[0][1], bet[1]]
ebet_df.head()

Unnamed: 0,source,dest,bedetweeness
0,7,1,0.021444
1,1,7,0.019283
2,9,1,0.013453
3,84,7,0.012613
4,7,84,0.012034


### Degree Centrality

In [32]:
degree_centreality = sorted(nx.degree_centrality(giant_component).items(), key=itemgetter(1), reverse=True)
deg_df = pd.DataFrame(columns=["tag", "degree centreality"])
for index, deg in enumerate(degree_centreality):
    deg_df.loc[index] = [ deg[0], deg[1] ]
deg_df.head()

Unnamed: 0,tag,degree centreality
0,4,1.109453
1,1,1.0
2,7,0.800995
3,8,0.631841
4,6,0.502488


### Betweenness Centrality

In [33]:
betweenness_centrality = sorted(nx.betweenness_centrality(giant_component).items(), key=itemgetter(1), reverse=True)
betweenness_centrality
bet_df = pd.DataFrame(columns=["tag", "betweenness centrality"])
for index, bet in enumerate(betweenness_centrality):
    bet_df.loc[index] = [ bet[0], bet[1] ]
bet_df.head()

Unnamed: 0,tag,betweenness centrality
0,1,0.286917
1,4,0.245784
2,7,0.178911
3,84,0.106425
4,9,0.067613


In [36]:
comm = list(nx.algorithms.community.k_clique_communities(giant_component.to_undirected(), 3))
print(len(comm))

1
