In [1]:
import networkit as nk
import graph_tool.all as gt
import networkx as nx

import powerlaw
import matplotlib.pyplot as plt

from plots import Plots
from tools import values_frequency

## load

In [2]:
snapReader = nk.graphio.SNAPGraphReader(False, False)
G_nk = snapReader.read("data/facebook_clean_data/artist_edges.csv")

In [3]:
G_gt = gt.load_graph_from_csv("data/facebook_clean_data/artist_edges.csv", csv_options={"delimiter": " "})

## topology parameters

In [4]:
degrees = [G_nk.degree(v) for v in G_nk.iterNodes()]
degree_distr = sorted(nk.centrality.DegreeCentrality(G_nk).run().scores(), reverse=True)
fitted_distr = powerlaw.Fit(degree_distr)

print('number of nodes: {}'.format(G_nk.numberOfNodes()))
print('number of edges: {}'.format(G_nk.numberOfEdges()))
print('min degree: {}'.format(min(degrees)))
print('max degree: {}'.format(max(degrees)))
print('average degree: {}'.format(sum(degrees)/len(degrees)))
print('----------------------------------------------------------------')
print('estimated gamma: {}'.format(fitted_distr.alpha))

number of nodes: 50515
number of edges: 819306
min degree: 1
max degree: 1469
average degree: 32.433851331287734
----------------------------------------------------------------
estimated gamma: 3.0696254581658513


Calculating best minimal value for power law fit
  (CDF_diff**2) /


In [5]:
degrees = G_gt.get_total_degrees(G_gt.get_vertices())
degree_distr, freq = values_frequency(degrees)
fitted_distr = powerlaw.Fit(degree_distr)

print('number of nodes: {}'.format(G_gt.num_vertices()))
print('number of edges: {}'.format(G_gt.num_edges()))
print('min degree: {}'.format(min(degrees)))
print('max degree: {}'.format(max(degrees)))
print('average degree: {}'.format(sum(degrees)/len(degrees)))
print('----------------------------------------------------------------')
print('estimated gamma: {}'.format(fitted_distr.alpha))

Calculating best minimal value for power law fit


number of nodes: 50515
number of edges: 819306
min degree: 1
max degree: 1469
average degree: 32.43812728892408
----------------------------------------------------------------
estimated gamma: 3.0207680134943375


## graph density

In [6]:
%time
graph_density = G_nk.numberOfEdges() / ((G_nk.numberOfNodes()*(G_nk.numberOfNodes()-1))/2)
print('the graph density equals: {}'.format(graph_density))

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.25 µs
the graph density equals: 0.0006421611293685727


## clustering coefficient

In [7]:
%time
clustering_coefficient = nk.globals.clustering(G_nk)
print('the clustering coefficient of the graph equals: {}'.format(clustering_coefficient))

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs
the clustering coefficient of the graph equals: 0.15230608876921742


In [8]:
%time
clustering_coefficient = gt.global_clustering(G_gt)
print('the clustering coefficient of the graph equals: {}'.format(clustering_coefficient[0]))

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 7.63 µs
the clustering coefficient of the graph equals: 0.05349710994355287


## assortativity

In [9]:
%time
G_nx = nk.nxadapter.nk2nx(G_nk)
assortativity = nx.degree_assortativity_coefficient(G_nx)
print('assortativity coefficient is: {}'.format(assortativity))

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 7.15 µs
assortativity coefficient is: -0.01906626068251133


In [10]:
%time
assortativity = gt.assortativity(G_gt, "total")
print('assortativity coefficient is: {}'.format(assortativity))

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.53 µs
assortativity coefficient is: (0.001832800048164561, 8.644549118279209e-05)


# community detection

## label propagation

In [11]:
%timeit label_prop_communities = nk.community.detectCommunities(G_nk, algo=nk.community.PLM(G_nk))

PLM(balanced,pc,turbo) detected communities in 0.20842313766479492 [s]
solution properties:
-------------------  -----------
# communities          34
min community size      4
max community size   9078
avg. community size  1485.74
modularity              0.615362
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.15722203254699707 [s]
solution properties:
-------------------  -----------
# communities          27
min community size      4
max community size   9823
avg. community size  1870.93
modularity              0.610753
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.17978739738464355 [s]
solution properties:
-------------------  -----------
# communities           32
min community size       4
max community size   10105
avg. community size   1578.59
modularity               0.60803
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.21448802947998047 [s]
solution properties:
----------

## louvian method

In [12]:
%timeit louvian_communities = nk.community.detectCommunities(G_nk, algo=nk.community.PLM(G_nk))

PLM(balanced,pc,turbo) detected communities in 0.23178339004516602 [s]
solution properties:
-------------------  ------------
# communities           33
min community size       3
max community size   10432
avg. community size   1530.76
modularity               0.613134
-------------------  ------------
PLM(balanced,pc,turbo) detected communities in 0.25515031814575195 [s]
solution properties:
-------------------  ------------
# communities           32
min community size       4
max community size   10385
avg. community size   1578.59
modularity               0.615967
-------------------  ------------
PLM(balanced,pc,turbo) detected communities in 0.27351927757263184 [s]
solution properties:
-------------------  -----------
# communities          32
min community size      4
max community size   9335
avg. community size  1578.59
modularity              0.615428
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.18131136894226074 [s]
solution properties:


# centralities 

## closeness

In [13]:
%timeit closeness_centrality = nk.centrality.Closeness(G_nk, False, nk.centrality.ClosenessVariant.Generalized)

269 ns ± 5.04 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## degree 

In [14]:
%timeit degree_centrality = nk.centrality.DegreeCentrality(G_nk)

187 ns ± 7.14 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## eigen vector

In [15]:
%timeit eigen_centrality = nk.centrality.EigenvectorCentrality(G_nk)

179 ns ± 1.68 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [16]:
%timeit eigen_centrality = gt.eigenvector(G_gt)

321 ms ± 19.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## page rank

In [17]:
%timeit page_rank_centrality = nk.centrality.PageRank(G_nk, 1e-6)

200 ns ± 9 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [18]:
%timeit page_rank_centrality = gt.pagerank(G_gt)

197 ms ± 17.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## katz

In [19]:
%timeit katz_centrality = nk.centrality.PageRank(G_nk, 1e-6)

196 ns ± 7.22 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [20]:
%timeit katz_centrality = gt.katz(G_gt)

  vprop.fa = vprop.fa / numpy.linalg.norm(vprop.fa)


8.25 s ± 175 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## HITS

In [21]:
%timeit hits_centrality = gt.hits(G_gt)

511 ms ± 9.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
