In [1]:
import networkit as nk
import graph_tool.all as gt
import networkx as nx

import powerlaw
import matplotlib.pyplot as plt

from plots import Plots
from tools import values_frequency

## load

In [2]:
snapReader = nk.graphio.SNAPGraphReader(False, False)
G_nk = snapReader.read("data/facebook_clean_data/public_figure_edges.csv")

In [3]:
G_gt = gt.load_graph_from_csv("data/facebook_clean_data/public_figure_edges.csv", csv_options={"delimiter": " "})

## topology parameters

In [4]:
degrees = [G_nk.degree(v) for v in G_nk.iterNodes()]
degree_distr = sorted(nk.centrality.DegreeCentrality(G_nk).run().scores(), reverse=True)
fitted_distr = powerlaw.Fit(degree_distr)

print('number of nodes: {}'.format(G_nk.numberOfNodes()))
print('number of edges: {}'.format(G_nk.numberOfEdges()))
print('min degree: {}'.format(min(degrees)))
print('max degree: {}'.format(max(degrees)))
print('average degree: {}'.format(sum(degrees)/len(degrees)))
print('----------------------------------------------------------------')
print('estimated gamma: {}'.format(fitted_distr.alpha))

number of nodes: 11565
number of edges: 67114
min degree: 1
max degree: 326
average degree: 11.599827064418504
----------------------------------------------------------------
estimated gamma: 2.4971785156569757


Calculating best minimal value for power law fit
  (CDF_diff**2) /


In [5]:
degrees = G_gt.get_total_degrees(G_gt.get_vertices())
degree_distr, freq = values_frequency(degrees)
fitted_distr = powerlaw.Fit(degree_distr)

print('number of nodes: {}'.format(G_gt.num_vertices()))
print('number of edges: {}'.format(G_gt.num_edges()))
print('min degree: {}'.format(min(degrees)))
print('max degree: {}'.format(max(degrees)))
print('average degree: {}'.format(sum(degrees)/len(degrees)))
print('----------------------------------------------------------------')
print('estimated gamma: {}'.format(fitted_distr.alpha))

number of nodes: 11565
number of edges: 67114
min degree: 1
max degree: 326
average degree: 11.606398616515348
----------------------------------------------------------------
estimated gamma: 7.392988690793513


Calculating best minimal value for power law fit


## graph density

In [6]:
%time
graph_density = G_nk.numberOfEdges() / ((G_nk.numberOfNodes()*(G_nk.numberOfNodes()-1))/2)
print('the graph density equals: {}'.format(graph_density))

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs
the graph density equals: 0.001003666431729103


## clustering coefficient

In [7]:
%time
clustering_coefficient = nk.globals.clustering(G_nk)
print('the clustering coefficient of the graph equals: {}'.format(clustering_coefficient))

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 7.87 µs
the clustering coefficient of the graph equals: 0.21388864761573873


In [8]:
%time
clustering_coefficient = gt.global_clustering(G_gt)
print('the clustering coefficient of the graph equals: {}'.format(clustering_coefficient[0]))

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 7.39 µs
the clustering coefficient of the graph equals: 0.1665644876656779


## assortativity

In [9]:
%time
G_nx = nk.nxadapter.nk2nx(G_nk)
assortativity = nx.degree_assortativity_coefficient(G_nx)
print('assortativity coefficient is: {}'.format(assortativity))

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 7.87 µs
assortativity coefficient is: 0.202161548290631


In [10]:
%time
assortativity = gt.assortativity(G_gt, "total")
print('assortativity coefficient is: {}'.format(assortativity))

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.53 µs
assortativity coefficient is: (0.00874265802241826, 0.0005564184360778624)


# community detection

## label propagation

In [11]:
%timeit label_prop_communities = nk.community.detectCommunities(G_nk, algo=nk.community.PLM(G_nk))

PLM(balanced,pc,turbo) detected communities in 0.12117171287536621 [s]
solution properties:
-------------------  -----------
# communities          32
min community size      5
max community size   1711
avg. community size   361.406
modularity              0.682358
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.020938396453857422 [s]
solution properties:
-------------------  -----------
# communities          37
min community size      4
max community size   2022
avg. community size   312.568
modularity              0.683259
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.04743361473083496 [s]
solution properties:
-------------------  -----------
# communities          41
min community size      4
max community size   2289
avg. community size   282.073
modularity              0.683099
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.035274505615234375 [s]
solution properties:
---------

PLM(balanced,pc,turbo) detected communities in 0.03068685531616211 [s]
solution properties:
-------------------  -----------
# communities          36
min community size      4
max community size   1797
avg. community size   321.25
modularity              0.682243
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.018923521041870117 [s]
solution properties:
-------------------  -----------
# communities          33
min community size      5
max community size   2059
avg. community size   350.455
modularity              0.682281
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.013770818710327148 [s]
solution properties:
-------------------  -----------
# communities          40
min community size      4
max community size   2231
avg. community size   289.125
modularity              0.681537
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.014089107513427734 [s]
solution properties:
---------

PLM(balanced,pc,turbo) detected communities in 0.03048396110534668 [s]
solution properties:
-------------------  ----------
# communities          43
min community size      3
max community size   2604
avg. community size   268.953
modularity              0.67976
-------------------  ----------
PLM(balanced,pc,turbo) detected communities in 0.02023172378540039 [s]
solution properties:
-------------------  -----------
# communities          37
min community size      5
max community size   1732
avg. community size   312.568
modularity              0.684214
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.013401985168457031 [s]
solution properties:
-------------------  -----------
# communities          39
min community size      4
max community size   1980
avg. community size   296.538
modularity              0.681249
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.017882823944091797 [s]
solution properties:
------------

## louvian method

In [12]:
%timeit louvian_communities = nk.community.detectCommunities(G_nk, algo=nk.community.PLM(G_nk))

PLM(balanced,pc,turbo) detected communities in 0.049889564514160156 [s]
solution properties:
-------------------  -----------
# communities          34
min community size      4
max community size   1896
avg. community size   340.147
modularity              0.683214
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.01735973358154297 [s]
solution properties:
-------------------  -----------
# communities          34
min community size      5
max community size   1716
avg. community size   340.147
modularity              0.681024
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.014238834381103516 [s]
solution properties:
-------------------  -----------
# communities          34
min community size      5
max community size   1750
avg. community size   340.147
modularity              0.682155
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.020569562911987305 [s]
solution properties:
--------

PLM(balanced,pc,turbo) detected communities in 0.0361180305480957 [s]
solution properties:
-------------------  -----------
# communities          31
min community size      5
max community size   1547
avg. community size   373.065
modularity              0.684038
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.016005992889404297 [s]
solution properties:
-------------------  -----------
# communities          37
min community size      4
max community size   1714
avg. community size   312.568
modularity              0.683216
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.014626264572143555 [s]
solution properties:
-------------------  ----------
# communities          41
min community size      3
max community size   2475
avg. community size   282.073
modularity              0.68212
-------------------  ----------
PLM(balanced,pc,turbo) detected communities in 0.017750263214111328 [s]
solution properties:
------------

PLM(balanced,pc,turbo) detected communities in 0.03238320350646973 [s]
solution properties:
-------------------  ----------
# communities          29
min community size      5
max community size   1754
avg. community size   398.793
modularity              0.68192
-------------------  ----------
PLM(balanced,pc,turbo) detected communities in 0.014662504196166992 [s]
solution properties:
-------------------  -----------
# communities          29
min community size      4
max community size   1677
avg. community size   398.793
modularity              0.681854
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.01602792739868164 [s]
solution properties:
-------------------  ----------
# communities          31
min community size      4
max community size   1909
avg. community size   373.065
modularity              0.68082
-------------------  ----------
PLM(balanced,pc,turbo) detected communities in 0.015029668807983398 [s]
solution properties:
---------------

# centralities 

## betweenness centrality

In [13]:
%timeit btwn_centrality = nk.centrality.Betweenness(G_nk).run()

35.5 s ± 1.89 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%timeit btwn_centrality = gt.betweenness(G_gt)

14.9 s ± 89.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## closeness

In [15]:
%timeit closeness_centrality = nk.centrality.Closeness(G_nk, False, nk.centrality.ClosenessVariant.Generalized)

279 ns ± 3.41 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [16]:
%timeit closeness_centrality = gt.closeness(G_gt)

16.6 s ± 1.27 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


## degree 

In [17]:
%timeit degree_centrality = nk.centrality.DegreeCentrality(G_nk)

202 ns ± 9.05 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## eigen vector

In [18]:
%timeit eigen_centrality = nk.centrality.EigenvectorCentrality(G_nk)

204 ns ± 5.61 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [19]:
%timeit eigen_centrality = gt.eigenvector(G_gt)

6.23 ms ± 215 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## page rank

In [20]:
%timeit page_rank_centrality = nk.centrality.PageRank(G_nk, 1e-6)

216 ns ± 9.73 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [21]:
%timeit page_rank_centrality = gt.pagerank(G_gt)

16 ms ± 858 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## katz

In [22]:
%timeit katz_centrality = nk.centrality.PageRank(G_nk, 1e-6)

211 ns ± 9.89 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [23]:
%timeit katz_centrality = gt.katz(G_gt)

196 ms ± 3.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## HITS

In [24]:
%timeit hits_centrality = gt.hits(G_gt)

9.94 ms ± 179 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
