In [1]:
import networkit as nk
import graph_tool.all as gt
import networkx as nx

import powerlaw
import matplotlib.pyplot as plt

from plots import Plots
from tools import values_frequency

## load

In [2]:
snapReader = nk.graphio.SNAPGraphReader(False, False)
G_nk = snapReader.read("data/facebook_clean_data/new_sites_edges.csv")

In [3]:
G_gt = gt.load_graph_from_csv("data/facebook_clean_data/new_sites_edges.csv", csv_options={"delimiter": " "})

## topology parameters

In [4]:
degrees = [G_nk.degree(v) for v in G_nk.iterNodes()]
degree_distr = sorted(nk.centrality.DegreeCentrality(G_nk).run().scores(), reverse=True)
fitted_distr = powerlaw.Fit(degree_distr)

print('number of nodes: {}'.format(G_nk.numberOfNodes()))
print('number of edges: {}'.format(G_nk.numberOfEdges()))
print('min degree: {}'.format(min(degrees)))
print('max degree: {}'.format(max(degrees)))
print('average degree: {}'.format(sum(degrees)/len(degrees)))
print('----------------------------------------------------------------')
print('estimated gamma: {}'.format(fitted_distr.alpha))

number of nodes: 27917
number of edges: 206259
min degree: 1
max degree: 678
average degree: 14.766020704230398
----------------------------------------------------------------
estimated gamma: 3.432841403240434


Calculating best minimal value for power law fit
  (CDF_diff**2) /


In [5]:
degrees = G_gt.get_total_degrees(G_gt.get_vertices())
degree_distr, freq = values_frequency(degrees)
fitted_distr = powerlaw.Fit(degree_distr)

print('number of nodes: {}'.format(G_gt.num_vertices()))
print('number of edges: {}'.format(G_gt.num_edges()))
print('min degree: {}'.format(min(degrees)))
print('max degree: {}'.format(max(degrees)))
print('average degree: {}'.format(sum(degrees)/len(degrees)))
print('----------------------------------------------------------------')
print('estimated gamma: {}'.format(fitted_distr.alpha))

number of nodes: 27917
number of edges: 206259
min degree: 1
max degree: 678
average degree: 14.776587742235913
----------------------------------------------------------------
estimated gamma: 2.734349580681313


Calculating best minimal value for power law fit


## graph density

In [6]:
%time
graph_density = G_nk.numberOfEdges() / ((G_nk.numberOfNodes()*(G_nk.numberOfNodes()-1))/2)
print('the graph density equals: {}'.format(graph_density))

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.48 µs
the graph density equals: 0.000529323246247167


## clustering coefficient

In [7]:
%time
clustering_coefficient = nk.globals.clustering(G_nk)
print('the clustering coefficient of the graph equals: {}'.format(clustering_coefficient))

CPU times: user 6 µs, sys: 1 µs, total: 7 µs
Wall time: 14.8 µs
the clustering coefficient of the graph equals: 0.3213758360114653


In [8]:
%time
clustering_coefficient = gt.global_clustering(G_gt)
print('the clustering coefficient of the graph equals: {}'.format(clustering_coefficient[0]))

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.01 µs
the clustering coefficient of the graph equals: 0.11398478178835561


## assortativity

In [9]:
%time
G_nx = nk.nxadapter.nk2nx(G_nk)
assortativity = nx.degree_assortativity_coefficient(G_nx)
print('assortativity coefficient is: {}'.format(assortativity))

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 7.15 µs
assortativity coefficient is: 0.021954006196974844


In [10]:
%time
assortativity = gt.assortativity(G_gt, "total")
print('assortativity coefficient is: {}'.format(assortativity))

CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 4.29 µs
assortativity coefficient is: (0.009376809716601328, 0.0003282454882932988)


# community detection

## label propagation

In [11]:
%timeit label_prop_communities = nk.community.detectCommunities(G_nk, algo=nk.community.PLM(G_nk))

PLM(balanced,pc,turbo) detected communities in 0.16346335411071777 [s]
solution properties:
-------------------  ----------
# communities          52
min community size      4
max community size   4080
avg. community size   536.865
modularity              0.62552
-------------------  ----------
PLM(balanced,pc,turbo) detected communities in 0.13046622276306152 [s]
solution properties:
-------------------  -----------
# communities          46
min community size      4
max community size   3980
avg. community size   606.891
modularity              0.622907
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.07355451583862305 [s]
solution properties:
-------------------  -----------
# communities          54
min community size      3
max community size   4145
avg. community size   516.981
modularity              0.623395
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.08147048950195312 [s]
solution properties:
--------------

PLM(balanced,pc,turbo) detected communities in 0.08611798286437988 [s]
solution properties:
-------------------  -----------
# communities          51
min community size      4
max community size   4067
avg. community size   547.392
modularity              0.624948
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.1451730728149414 [s]
solution properties:
-------------------  -----------
# communities          49
min community size      4
max community size   3984
avg. community size   569.735
modularity              0.625745
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.08039426803588867 [s]
solution properties:
-------------------  -----------
# communities          45
min community size      4
max community size   4027
avg. community size   620.378
modularity              0.624546
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.07417821884155273 [s]
solution properties:
------------

PLM(balanced,pc,turbo) detected communities in 0.06615853309631348 [s]
solution properties:
-------------------  -----------
# communities          48
min community size      4
max community size   4040
avg. community size   581.604
modularity              0.624414
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.06227469444274902 [s]
solution properties:
-------------------  -----------
# communities          45
min community size      4
max community size   4021
avg. community size   620.378
modularity              0.624172
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.0752108097076416 [s]
solution properties:
-------------------  -----------
# communities          42
min community size      4
max community size   3993
avg. community size   664.69
modularity              0.625367
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.0845797061920166 [s]
solution properties:
--------------

## louvian method

In [12]:
%timeit louvian_communities = nk.community.detectCommunities(G_nk, algo=nk.community.PLM(G_nk))

PLM(balanced,pc,turbo) detected communities in 0.061625003814697266 [s]
solution properties:
-------------------  -----------
# communities          48
min community size      4
max community size   4205
avg. community size   581.604
modularity              0.626592
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.08543705940246582 [s]
solution properties:
-------------------  -----------
# communities          47
min community size      4
max community size   3892
avg. community size   593.979
modularity              0.625297
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.0809621810913086 [s]
solution properties:
-------------------  -----------
# communities          44
min community size      4
max community size   4048
avg. community size   634.477
modularity              0.626927
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.07397150993347168 [s]
solution properties:
-----------

-------------------  -----------
# communities          47
min community size      4
max community size   4049
avg. community size   593.979
modularity              0.624725
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.07985210418701172 [s]
solution properties:
-------------------  -----------
# communities          41
min community size      4
max community size   3966
avg. community size   680.902
modularity              0.625452
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.0642397403717041 [s]
solution properties:
-------------------  -----------
# communities          44
min community size      4
max community size   3974
avg. community size   634.477
modularity              0.624308
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.0641183853149414 [s]
solution properties:
-------------------  -----------
# communities          46
min community size      4
max community size  

PLM(balanced,pc,turbo) detected communities in 0.06883120536804199 [s]
solution properties:
-------------------  -----------
# communities          48
min community size      4
max community size   3859
avg. community size   581.604
modularity              0.624053
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.06845855712890625 [s]
solution properties:
-------------------  -----------
# communities          51
min community size      3
max community size   4036
avg. community size   547.392
modularity              0.623202
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.06054353713989258 [s]
solution properties:
-------------------  -----------
# communities          41
min community size      4
max community size   4069
avg. community size   680.902
modularity              0.624722
-------------------  -----------
PLM(balanced,pc,turbo) detected communities in 0.06970047950744629 [s]
solution properties:
-----------

# centralities 

## closeness

In [14]:
%timeit closeness_centrality = nk.centrality.Closeness(G_nk, False, nk.centrality.ClosenessVariant.Generalized)

272 ns ± 9.59 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [15]:
%timeit closeness_centrality = gt.closeness(G_gt)

1min 55s ± 1.08 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


## degree 

In [16]:
%timeit degree_centrality = nk.centrality.DegreeCentrality(G_nk)

201 ns ± 9.74 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## eigen vector

In [17]:
%timeit eigen_centrality = nk.centrality.EigenvectorCentrality(G_nk)

204 ns ± 20.2 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [18]:
%timeit eigen_centrality = gt.eigenvector(G_gt)

45.1 ms ± 7.03 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## page rank

In [19]:
%timeit page_rank_centrality = nk.centrality.PageRank(G_nk, 1e-6)

203 ns ± 4.02 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [20]:
%timeit page_rank_centrality = gt.pagerank(G_gt)

55.3 ms ± 2.05 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## katz

In [21]:
%timeit katz_centrality = nk.centrality.PageRank(G_nk, 1e-6)

196 ns ± 3.64 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [22]:
%timeit katz_centrality = gt.katz(G_gt)

183 ms ± 4.53 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## HITS

In [23]:
%timeit hits_centrality = gt.hits(G_gt)

67.5 ms ± 1.35 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
