# Our imported packages:

In [1]:
import networkx as nx
import pandas as pd
from networkx.algorithms import community
from networkx.algorithms.core import core_number, k_core, k_shell
from infomap import Infomap
import random

# Read the *.graphml graph file:

In [2]:
G = nx.read_graphml('graph.graphml')
print(G)

DiGraph with 24120 nodes and 546104 edges


# Question 3.

In [3]:
# convert directed graph to undirected one, just for this question:

G_undirected = G.to_undirected()
print(G_undirected)

Graph with 24120 nodes and 521820 edges


In [4]:
# perform k-core algorithm on the graph:

G_k_core = k_core(G_undirected)
print(f'K-core results are: {G_k_core}')

G_k_shell = k_shell(G_undirected)
print(f'K-shell results are: {G_k_shell}')

# now, we can compute k-shell values:
k_shell_values = core_number(G_undirected)

K-core results are: Graph with 550 nodes and 51622 edges
K-shell results are: Graph with 550 nodes and 51622 edges


In [5]:
k_shell_values

{'7552': 9,
 '742143': 0,
 '764537': 112,
 '787430': 18,
 '788524': 0,
 '790728': 49,
 '1652541': 0,
 '2097571': 0,
 '2172631': 4,
 '2179651': 3,
 '2216711': 17,
 '2284181': 12,
 '3920811': 90,
 '3951121': 1,
 '4782551': 124,
 '4921131': 5,
 '4970411': 0,
 '5107071': 2,
 '5488652': 47,
 '5618502': 3,
 '5775802': 16,
 '5813232': 14,
 '6134882': 0,
 '6135642': 48,
 '6167132': 0,
 '6168042': 3,
 '6195222': 0,
 '6509832': 0,
 '7214152': 31,
 '7852122': 6,
 '7998422': 4,
 '8920142': 12,
 '10257602': 0,
 '11153652': 11,
 '11744152': 40,
 '11790882': 52,
 '12629792': 0,
 '14156314': 0,
 '14200914': 22,
 '14260960': 0,
 '14293310': 0,
 '14320844': 5,
 '14354395': 12,
 '14355043': 0,
 '14372104': 1,
 '14373023': 0,
 '14490231': 4,
 '14496155': 13,
 '14499904': 15,
 '14524058': 16,
 '14545668': 6,
 '14600723': 64,
 '14654188': 40,
 '14700316': 0,
 '14735121': 2,
 '14755862': 5,
 '14808514': 44,
 '14820509': 2,
 '14824449': 9,
 '14851948': 5,
 '14933858': 10,
 '15057400': 0,
 '15063044': 0,
 '150

In [6]:
# add the computed k-shell values for all of the nodes to the graph:

for node in G.nodes(data=True):
    node[1]['k-shell'] = k_shell_values[node[0]]

In [7]:
# proving that the k-shell values are added:

for _ in range(10):
    print(random.sample(G.nodes(data=True), 1))
    print('\n')

[('1451962294976630802', {'label': 'Masooud_Taji4', 'tweets': 91, 'replies': 325, 'retweets': 4228, 'quotes': 23, 'followers': 1149, 'impression': 57700, 'age': 390.482, 'activedays': 53, 'reaction': 4576, 'activity': 4667, 'robot': 0.405, 'btsexo': 0, 'size': 10.0, 'r': 0, 'g': 0, 'b': 0, 'x': -10.759119, 'y': 323.38513, 'k-shell': 70})]


[('1206198971775012865', {'label': 'Alk14Mim', 'tweets': 7, 'replies': 7, 'retweets': 3, 'quotes': 0, 'followers': 113, 'impression': 1183, 'age': 1068.496, 'activedays': 12, 'reaction': 10, 'activity': 17, 'robot': 0.0, 'btsexo': 0, 'size': 10.0, 'r': 0, 'g': 0, 'b': 0, 'x': 73.76182, 'y': 248.43999, 'k-shell': 1})]


[('1246056841160908800', {'label': 'shaian_hagh', 'tweets': 44, 'replies': 28, 'retweets': 919, 'quotes': 58, 'followers': 68, 'impression': 69823, 'age': 958.673, 'activedays': 59, 'reaction': 1005, 'activity': 1049, 'robot': 0.042, 'btsexo': 0, 'size': 10.0, 'r': 0, 'g': 0, 'b': 0, 'x': 245.83047, 'y': -5.5238476, 'k-shell': 33})]



since Python 3.9 and will be removed in a subsequent version.
  print(random.sample(G.nodes(data=True), 1))


In [8]:
# saving the results as a dataframe for showing them in a nice way:

df = pd.DataFrame(columns=['id', 'label', 'age', 'k-shell'])

counter = 0

for item in G.nodes(data=True):
    df.loc[counter, 'id'] = item[0]
    df.loc[counter, 'label'] = item[1]['label']
    df.loc[counter, 'age'] = item[1]['age']
    df.loc[counter, 'k-shell'] = item[1]['k-shell']
    counter += 1

In [9]:
df.head()

Unnamed: 0,id,label,age,k-shell
0,7552,Pitpitak1,5887.392,9
1,742143,BBCWorld,5768.756,0
2,764537,AminSabeti,5758.664,112
3,787430,Hamed,5748.24,18
4,788524,guardiannews,5747.657,0


In [10]:
max(df['k-shell'])

124

In [11]:
df.sort_values('k-shell', ascending=False).head(20)

Unnamed: 0,id,label,age,k-shell
12060,1356146323540631555,NYXEphione,654.817,124
13122,1389239724854988807,imrman_,563.566,124
9729,1283165933293514752,Amee_thyst,856.225,124
16131,1458710321439199235,AD_AR7,371.866,124
1181,542347917,Javanmardi75,3882.993,124
22169,1572877212839182337,shoebillny1,56.693,124
16115,1458361271200460805,parkoch,372.824,124
16093,1457967737284841477,seronshi,373.746,124
9715,1283139557047795713,khodesepid,856.278,124
13016,1386347427716861962,_icecat,571.533,124


In [12]:
# saving the modified graph as *.graphml to inspect in gephi:
nx.write_graphml(G, 'question3_resulting_graph.graphml')

# Question 5.

In [13]:
# infomap:

# first, run the infomap algorithm on the graph:

im = Infomap(silent=True, num_trials=50)

im_graph = im.add_networkx_graph(G)

im.run()

print(f'Found {im.num_top_modules} modules with codelength {im.codelength:.8f} bits.')

Found 3749 modules with codelength 6.20078782 bits.


In [14]:
# then, we assign the communities to each node:

communities = {}

for node in im.nodes:
    communities[im_graph[node.node_id]] = node.module_id

nx.set_node_attributes(G, communities, 'community')

In [15]:
# proving that the community values are added:

for _ in range(10):
    print(random.sample(G.nodes(data=True), 1))
    print('\n')

[('1578338346107011072', {'label': 'NamelessGuts', 'tweets': 339, 'replies': 251, 'retweets': 3793, 'quotes': 182, 'followers': 537, 'impression': 116432, 'age': 41.05, 'activedays': 41, 'reaction': 4226, 'activity': 4565, 'robot': 0.118, 'btsexo': 0, 'size': 10.0, 'r': 0, 'g': 0, 'b': 0, 'x': -149.8811, 'y': 207.1718, 'k-shell': 98, 'community': 2})]


[('929417183314182145', {'label': 'pilot2670', 'tweets': 76, 'replies': 112, 'retweets': 237, 'quotes': 9, 'followers': 5765, 'impression': 53707, 'age': 1832.437, 'activedays': 47, 'reaction': 358, 'activity': 434, 'robot': 0.043, 'btsexo': 1, 'size': 10.0, 'r': 0, 'g': 0, 'b': 0, 'x': -139.7425, 'y': 325.33838, 'k-shell': 12, 'community': 1})]


[('1574904759810760706', {'label': 'nenupharnilofar', 'tweets': 259, 'replies': 18, 'retweets': 158, 'quotes': 20, 'followers': 252, 'impression': 724, 'age': 50.847, 'activedays': 12, 'reaction': 196, 'activity': 455, 'robot': 0.07, 'btsexo': 0, 'size': 10.0, 'r': 0, 'g': 0, 'b': 0, 'x': -378

since Python 3.9 and will be removed in a subsequent version.
  print(random.sample(G.nodes(data=True), 1))


In [16]:
# saving the modified graph as *.graphml to inspect in gephi:
nx.write_graphml(G, 'question3_5_resulting_graph.graphml')

In [17]:
# # girvan_newman: (this approach gets stuck in the for loop, does not print communities)

# communities = community.girvan_newman(G)

# print(communities)

# for comm in communities:
#     print(comm)

In [18]:
# # asyn_fluidc: (this approach gives me the error of wanting connected graph)

# communities = community.asyn_fluidc(graph, k=100) # k == number of communities to be found.

# print(communities)

# for comm in communities:
#     print(comm)

In [19]:
# asyn_lpa_communities:

communities = community.asyn_lpa_communities(G)

print(communities)

for comm in communities:
    print(comm)

<generator object asyn_lpa_communities at 0x000002BE78D1EC80>
{'7552'}
{'742143'}
{'1256337659879964674', '1421390158847520774', '1323252796683177984', '291884941', '824523455035043840', '1553150144308301825', '1574401838559989760', '1572518009524166657', '808576328', '49295564', '750582066782343172', '711868207620071424', '853258202', '1219725428958617613', '1356416668679962624', '1339675659543711747', '872785183061286912', '1141386179729928194', '1573394203668873259', '1490525245', '1474081978676916226', '1448345776225243141', '1531593985575239680', '879764194366824448', '1291922490734907392', '994884644565868544', '1308208546958528512', '1573711682613960707', '725878273704660992', '732711838933262340', '1197369739657535488', '1048664018280488961', '952978023372017664', '4294844716', '3153321672', '1218138009910026240', '1283128061043539974', '1228091666', '1360107524897308672', '2955714733', '811141626122878976', '1109114327661981696', '1504853641408172040', '906915481508073472', '9

In [20]:
# label_propagation_communities: (works with only undirected graphs)

communities = community.label_propagation_communities(G_undirected)

print(communities)

for comm in communities:
    print(comm)

dict_values([{'1573043406472814592', '1540612124799647746', '1545498458630590464', '3230939541', '1269925869247586305', '1322472808706940928', '1382426618799525893', '1421390158847520774', '965128201', '1530863847858483200', '1561053407489806339', '1403309946163372032', '824523455035043840', '1431298667584053252', '6135642', '1391883186637000705', '1333202143390670848', '1573788937004273664', '1534235361416732672', '2811920513', '1509317510595817472', '1523583624343437313', '750582066782343172', '1412024202161446916', '1288408585328832513', '622746735', '1216118287848943621', '1514953619023478786', '1553358532304670721', '1238469112189333504', '1573660845057400832', '1343942040166477825', '1448345776225243141', '1499180895026167809', '852627170560061440', '1559914037307121666', '1501799533965193223', '1580587249191862274', '1587458091729166336', '1235887590215053312', '53882483', '1308208546958528512', '1515767668757962768', '1573711682613960707', '1215512729739546624', '72587827370466