In [1]:
from typing import Mapping, MutableMapping, Sequence, Iterable, List, Set, Any, Callable, TypeVar, Iterator, Union, Optional

from math import floor

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# to plot in a separate window
%matplotlib qt
# to plot inline
# %matplotlib inline

# Computes the (weighted) graph of k-Neighbors for points in X
from sklearn.neighbors import kneighbors_graph, radius_neighbors_graph

# to view point cloud
import pyvista as pv

# to work with graphs
import igraph as ig

# to correctly display farsi chars
# to be used in farsify func
import arabic_reshaper
from bidi.algorithm import get_display

# define project name (to be used for file/export naming)
name = 'dense_point_cloud'

### define some function

In [2]:
def farsify(text: str) -> str:
    '''
    Gets a string (in persian) and returns a persian string than can be shown correctly

    * Don't forget to import:
        import arabic_reshaper
        from bidi.algorithm import get_display
    '''
    return get_display(arabic_reshaper.reshape(u'%s' % str(text)))

def rgb_to_hex(rgb: tuple, normalized=False):
    '''
    returns HEX color for an RGB (or RGBA) color
        * in RGBA we igonre the alpha (A) value
        ** remember to: from math import floor

    normalized: bool, default=False 
        whether the colors are normalized ([0, 1]) or not
    '''
    if normalized:
        coeffi = 255
    else:
        coeffi = 1
    return f'#{floor(rgb[0]*coeffi):02x}{floor(rgb[1]*coeffi):02x}{floor(rgb[2]*coeffi):02x}'

def flatten(xss: Sequence[list]) -> Iterator[Any]:
    for item in xss:
        if isinstance(item, Sequence):
#             # method 1: readable but not sophisticated (flatten 1 lvl only)
#             for sub_item in item:
#                 yield sub_item
                
            # method 2: less readable but sophisticated (flatten 1 lvl only)
            # yield from item
            
#             # method 3: use recursion to flatten (flatten n lvl without limit)
            yield from flatten(item)
    
        else:
            yield item

### Create DataFrame from text 

In [8]:
# point_path = '/run/media/tekboart/SAGE 1 WDRed/0 Thesis Data (Original+ Raw files)/سمنان/scan-masjed jameh semnan/Point cloud/Masjed_Jame_Semnan_0a_5cm.txt'
# point_path = '/run/media/tekboart/SAGE 1 WDRed/0 Thesis Data (Original+ Raw files)/سمنان/scan-masjed jameh semnan/Point cloud/Masjed_Jame_Semnan_0b_5cm.txt'
point_path = './Data/Masjed_Jame_Semnan_0b_5cm.txt'
# point_path = './Data/Point Clouds for Testing as Graph/99000 points/99000_points.txt'
# point_path = './Data/Point Clouds for Testing as Graph/61700 points/61700_points.txt'

# for test added .head(100) --> remove it for final calc
# df = pd.read_csv(point_path, sep=" ", names=['x', 'y', 'z', 'r', 'g', 'b']).head(25000)
df = pd.read_csv(point_path, sep=" ", names=['x', 'y', 'z', 'r', 'g', 'b']).sample(25000).reset_index(drop=True)
# df = pd.read_csv(point_path, sep=" ", names=['x', 'y', 'z', 'r', 'g', 'b'])
df['hex_color'] = df.apply(lambda x: rgb_to_hex((x['r'], x['g'], x['b'])), axis=1)
df

Unnamed: 0,x,y,z,r,g,b,hex_color
0,1010.992,998.958,102.873,223,218,208,#dfdad0
1,1011.442,1004.322,104.653,133,125,115,#857d73
2,1031.711,998.625,103.775,135,127,118,#877f76
3,1021.875,1013.025,107.084,61,39,23,#3d2717
4,1010.989,1010.952,103.162,157,129,102,#9d8166
...,...,...,...,...,...,...,...
24995,1012.463,998.823,104.695,118,106,102,#766a66
24996,1017.781,1014.169,102.723,99,98,94,#63625e
24997,1017.523,1014.332,101.074,113,119,118,#717776
24998,1026.546,1001.995,104.064,144,121,100,#907964


### visualize the point cloud

In [10]:
points_position_matrix = df.loc[:, ['x', 'y', 'z']].to_numpy()
point_cloud = pv.PolyData(points_position_matrix)
# define the points rgb colors as attributes to points
point_cloud['point_color'] = df.loc[:, ['r', 'g', 'b']].to_numpy()
print(point_cloud)

# Sanity Check
print('was point clouds created correctly? ',np.allclose(points_position_matrix, point_cloud.points))

# visualize point clouds
# jupyter_backend: 'pythreejs', 'ipygany', 'panel'
# notebook=False --> the point cloud renders in a separate windows (very fast) (no need for jupyter_backend)
point_cloud.plot(eye_dome_lighting=True, rgb=True, notebook=False)

PolyData (0x1b8ada06be0)
  N Cells:	25000
  N Points:	25000
  X Bounds:	1.006e+03, 1.037e+03
  Y Bounds:	9.908e+02, 1.015e+03
  Z Bounds:	1.001e+02, 1.166e+02
  N Arrays:	1

was point clouds created correctly?  True


### Create Adjacency Matrix based on point clouds

#### Using Sklearn
https://scikit-learn.org/stable/modules/classes.html#module-sklearn.neighbors


In [5]:
# for mode: use 'connectivity' for non-weighted graph and 'distance' for weighted one
# since we want to cluster based on distance then distance might be betther
# since we don't want self loops --> include_self=False

# Based on #neighbours
# A_from_point_cloud = kneighbors_graph(df, n_neighbors=4, mode='distance', include_self=False)
A_from_point_cloud = kneighbors_graph(df[['x', 'y', 'z']], n_neighbors=4, mode='distance', include_self=False)

# based on radius
# p --> the power of minkowski dist --> p=1: manhattan, p=2: euclidean
# The result is a sparse matrix of CSR fomat
A_from_point_cloud = radius_neighbors_graph(df[['x', 'y', 'z']], radius=1.5, mode='distance', metric='minkowski', p=2, include_self=False)
# A_from_point_cloud = radius_neighbors_graph(df, radius=2, mode='distance', metric='minkowski', p=2, include_self=False)

# Create adjacency matrix (to be used with igraph to create a directed graph)
# so convert it to an numpy.ndarray (to create pandas adjacency matrix from it)
A_ndarray = A_from_point_cloud.toarray()
del A_from_point_cloud
A_ndarray

# Create a panda Adjacency Matrix (from the sparse matrix)
df_adjacency = pd.DataFrame(A_ndarray)
del A_ndarray
df_adjacency

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,24990,24991,24992,24993,24994,24995,24996,24997,24998,24999
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Using DGL
https://docs.dgl.ai/generated/dgl.knn_graph.html?highlight=cloud


In [None]:
# # Work with graph neural networks
# import dgl
# !export DGLBACKEND='tensorflow'

# !export DGLBACKEND='pytorch'
# # Use DGL to create a graph from the Sparse matrix (A)
# graph = dgl.from_scipy(A)
# print(graph.num_nodes())

# graph.ndata['x'] = df['x']

# # import torch as th
# # from dgl.nn import EdgeConv

# # g = dgl.add_self_loop(graph)
# # feat = th.ones(graph.num_nodes(), 10)
# # conv = EdgeConv(10, 2)
# # res = conv(g, feat)
# # res

# # dgl.knn_graph(A, 2, algorithm='bruteforce-blas', dist='euclidean')

### Create Graph from The Sparse Matrix

In [6]:
# define the node names from tags' adjacency matrix
node_names = df_adjacency.index

# get the values from tags' adjacency matrix
A = df_adjacency.values

# create an empty graph
# create a python_list from the existent edges (A > 0)
g = ig.Graph().Adjacency((A > 0).tolist())

# Add edge weights and node labels.
g.vs['label'] = node_names  # or a.index/a.columns
# return the index of the cells which are non-zero
g.es['weight'] = A[A.nonzero()]
# print(g.es['weight'])

# add nodes coordination
g.vs['x'] = df['x']
g.vs['y'] = df['y']
g.vs['z'] = df['z']

# add nodes' RBG color
# divide by 255: since igraph only accepts normalized ([0, 1]) RGB values
# use RGB in a tuple if want to use colors for community detection as well
# otherwise use 'hex_color' col
# g.vs['color'] = df.apply(lambda x: (x['r']/255, x['g']/255, x['b']/255), axis=1)
g.vs['color'] = df['hex_color']

# make the graph undirected
# False or "each" means that every edge will be kept (with the arrowheads removed)
# g.simplify(combine_edges='sum')
# g.to_undirected(mode='each')


# To reconstruct the Adjacency matrix from the ig.Graph()
df_from_graph = pd.DataFrame(g.get_adjacency(attribute='weight').data, columns=g.vs['label'], index=g.vs['label'])
# to test if the Graph was created correctly by ig
print("was the Graph created by adjacency list without issue: ",
      (df_from_graph == df_adjacency).all().all()
     )  # --> True
# display(df_from_graph)

# test the created graph
print("is the graph connect (strong): ", g.is_connected(mode='strong'))
print("is the graph connect (weak): ", g.is_connected(mode='weak'))
print("is the graph simple (no loop or multiple edges): ", g.is_simple())
print("is the graph directed: ", g.is_directed())
print("is the graph multi-edged: ", g.has_multiple())
# check whether each edge has an opposite pair (mutual)
print("is the graph mutual: ", all(g.is_mutual()))


# # to omit NaN Values (not sure if it's a right thing to do)
# for i in range(len(g.vs['closeness'])):
#     if np.isnan(g.vs['closeness'][i]):
# #         print(g.vs['closeness'][i])
#         g.vs['closeness'][i] = 0

# # export the graph network as a file
# g.write_gml(f'point_cloud_{name}_graph.gml')

# # test the attributes of nodes
# print(*[node for node in g.vs], sep="\n\n")

# ** delete the adjacency matrix since we have created a graph of it 
# and no longer need it
del df

was the Graph created by adjacency list without issue:  True
is the graph connect (strong):  False
is the graph connect (weak):  True
is the graph simple (no loop or multiple edges):  True
is the graph directed:  True
is the graph multi-edged:  False
is the graph mutual:  False


In [9]:
# print(*[node for node in g.vs], sep="\n\n")

g.is_connected()

False

### Print Properties of the Graph

In [7]:
df_graph_stats = pd.DataFrame({"Graph Properties": [g.vcount(), 
                                                    g.ecount(), 
                                                    g.diameter(directed=True, unconn=(not g.is_connected())),
                                                    g.density(loops=False),
                                                    g.is_directed(), 
                                                    g.is_connected(mode='strong'),
                                                    g.is_connected(mode='weak'),
                                                    g.has_multiple(),
                                                    g.is_simple()
                                                   ],
                              },
                              index=['#Nodes',
                                     '#Edges',
                                     'Diameter',
                                     'Density',
                                     'Directed',
                                     'Strong_Connected',
                                     'Weak_Connected',
                                     'Multigraphs',
                                     'Simple_graph'
                                    ]
                             )
df_graph_stats

Unnamed: 0,Graph Properties
#Nodes,25000
#Edges,100000
Diameter,549
Density,0.00016
Directed,True
Strong_Connected,False
Weak_Connected,True
Multigraphs,False
Simple_graph,True


### Calculate Centralities

In [11]:
# node based centralities
g.vs['degree'] = g.degree()
g.vs['closeness'] = g.closeness()
# g.vs['harmonic'] = g.harmonic_centrality()
g.vs['eigenvector'] = g.eigenvector_centrality(directed=True)

# edge based centralities
# g.es['betweenness'] = g.edge_betweenness(directed=False)


# # give nodes color based on their degree
# # to be used for graph visualization
# for node in g.vs:
#     if node['degree'] > 200:
#         node['color'] = '#D04711'
#     elif node['degree'] > 150:
#         node['color'] = '#E36005'
#     elif node['degree'] > 100:
#         node['color'] = '#F89F05'
#     elif node['degree'] > 50:
#         node['color'] = '#F2D670'
#     elif node['degree'] > 20:
#         node['color'] = '#FCDC97'
#     else:
#         node['color'] = '#F9FFD1'

## omit low degree nodes (Based on Degree frequency)

In [12]:
# # Plot the nodes' degree frequency
# fig, ax = plt.subplots(figsize=(10, 5))

# # plt.hist(tag_list_final, ax=ax)
# plt.ylabel(farsify("فراوانی درجه ها"))
# plt.xlabel(farsify("درجه گره ها"))
# plt.title(farsify("نمودار فراوانی درجه کلیدواژه های استاد اقدسی"))
# ax.tick_params(axis='x', rotation=90)

# ax = sns.histplot(data=g.vs['degree'], palette="Set3", ax=ax)

# plt.show()

degree_threshold = 1
# pythonic way
# to_delete_ids = [v.index for v in g.vs if v['degree'] <= degree_threshold]
# igraph way
to_delete_ids = [v.index for v in g.vs(_degree_le=degree_threshold)]
print("#low_degree_nodes to delete: ", len(to_delete_ids))
g.delete_vertices(to_delete_ids)

#low_degree_nodes to delete:  0


In [13]:
# create a Node list (DataFrame) with diff attributes
df_node_centrality = pd.DataFrame(
    {'degree': g.vs['degree'],
     'closeness': g.vs['closeness'],
    #  'harmonic': g.vs['harmonic'], 
     'eigenvector': g.vs['eigenvector']}, 
    index=g.vs['label'])

# sort the nodes by type of centrality
print("Sort by Degree".center(89, "-"))
display(df_node_centrality.sort_values('degree', ascending=False))
print("Sort by Closeness".center(89, "-"))
display(df_node_centrality.sort_values('closeness', ascending=False))
# print("Sort by Harmonic".center(89, "-"))
# display(df_node_centrality.sort_values('harmonic', ascending=False))
print("Sort by EigenVector".center(89, "-"))
display(df_node_centrality.sort_values('eigenvector', ascending=False))

# Table of zero/0 degree nodes (Due to iran Doc's mistake in inputing data)
display(df_node_centrality[df_node_centrality['degree'] == 0])

--------------------------------------Sort by Degree-------------------------------------


Unnamed: 0,degree,closeness,eigenvector
7264,13,0.025563,0.000000e+00
22188,13,0.029836,1.897119e-03
13493,12,0.026859,0.000000e+00
823,12,0.015460,0.000000e+00
11603,12,0.027487,0.000000e+00
...,...,...,...
16026,4,0.200546,1.322082e-18
2018,4,0.020697,1.322082e-18
16016,4,0.157388,1.322082e-18
16006,4,0.156051,1.322082e-18


------------------------------------Sort by Closeness------------------------------------


Unnamed: 0,degree,closeness,eigenvector
2808,10,1.000000,1.373446e-03
2809,10,1.000000,1.373446e-03
2198,9,0.857143,1.265016e-03
2196,8,0.857143,9.397262e-04
2817,11,0.818182,1.990989e-03
...,...,...,...
110,10,0.010758,8.191365e-15
112,10,0.010758,8.201272e-15
336,8,0.010758,4.461717e-15
335,6,0.010643,1.190832e-15


-----------------------------------Sort by EigenVector-----------------------------------


Unnamed: 0,degree,closeness,eigenvector
1704,9,0.018595,1.000000
1708,11,0.018596,0.947368
1710,9,0.018257,0.921053
1703,8,0.018595,0.885965
1714,9,0.018595,0.868421
...,...,...,...
8574,11,0.031211,0.000000
8573,7,0.031154,0.000000
8572,8,0.031615,0.000000
8571,8,0.030793,0.000000


Unnamed: 0,degree,closeness,eigenvector


### Distribution of Centralities

In [66]:
# Returns the histogram of Degree Centrality
# print(vertex_cluster.size_histogram())
fig, ax = plt.subplots(figsize=(10, 7))
label_font_size = 14
plt.ylabel(farsify("فراوانی/تعداد مرکزیت"), fontsize=label_font_size)
plt.xlabel(farsify("مقدار مرکزیت"), fontsize=label_font_size)
plt.title(farsify("توزیع فراوانی گسسته مرکزیت درجه گره ها"), fontsize=label_font_size)
# adjust the x axis params
ax.tick_params(axis='both', direction='inout', color='black', pad=5)
# plt.hist(vertex_cluster.membership)
ax = sns.histplot(data=sorted(df_node_centrality['degree']), ax=ax, binwidth=1, bins=df_node_centrality['degree'].value_counts())
plt.show()

# Returns the histogram of closeness Centrality
# print(vertex_cluster.size_histogram())
fig, ax = plt.subplots(figsize=(10, 7))
label_font_size = 14
plt.ylabel(farsify("فراوانی/تعداد مرکزیت"), fontsize=label_font_size)
plt.xlabel(farsify("مقدار مرکزیت"), fontsize=label_font_size)
plt.title(farsify("توزیع فراوانی پیوسته مرکزیت نزدیکی گره ها"), fontsize=label_font_size)
# adjust the x axis params
# ax.tick_params(axis='both', direction='inout', color='black', pad=5)
# plt.hist(vertex_cluster.membership)
ax = sns.histplot(data=sorted(df_node_centrality['closeness']), ax=ax)
ax.set_yscale("log")
plt.show()

# Returns the histogram of Eigevector Centrality
# print(vertex_cluster.size_histogram())
fig, ax = plt.subplots(figsize=(10, 7))
label_font_size = 14
plt.ylabel(farsify("فراوانی/تعداد مرکزیت"), fontsize=label_font_size)
plt.xlabel(farsify("مقدار مرکزیت"), fontsize=label_font_size)
plt.title(farsify("توزیع فراوانی پیوسته مرکزیت بردار ویژه گره ها"), fontsize=label_font_size)
# adjust the x axis params
# ax.tick_params(axis='both', direction='inout', color='black', pad=5)
# plt.hist(vertex_cluster.membership)
ax = sns.histplot(data=sorted(df_node_centrality['eigenvector']), ax=ax, kde=True)
ax.set_yscale("log")
plt.show()

No handles with labels found to put in legend.


## Plot Centralities correlations(pairplot)

In [20]:
# scale the values before pairpolot
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(0, 1))
# since output doesn't have col names we create a new df with col names
# used .round(n) to round the numbers to n decimal fractions
df_node_centrality_normal = pd.DataFrame(scaler.fit_transform(df_node_centrality).round(3) , columns=df_node_centrality.columns, index=df_node_centrality.index)

display(df_node_centrality_normal)
# display(df_node_centrality_normal.sort_values('degree', ascending=False))

# plot correlation of nodes centralities
sns_plot = sns.pairplot(df_node_centrality_normal, height=3)
# sns_plot.savefig("graph_nodes_centrality_pairplot.pdf")

# # KDE Plot is used for visualizing the Probability Density of a continuous variable.
# sns_plot_kde = sns.pairplot(df_node_centrality_normal, kind="kde", height=3)
# # sns_plot_kde.savefig("graph_nodes_centrality_pairplot_kde.pdf")

Unnamed: 0,degree,closeness,eigenvector
0,0.111,0.002,0.000
1,0.444,0.002,0.000
2,0.333,0.002,0.000
3,0.556,0.002,0.000
4,0.444,0.002,0.000
...,...,...,...
24995,0.556,0.014,0.003
24996,0.222,0.014,0.001
24997,0.556,0.014,0.003
24998,0.333,0.014,0.001


## visualize the Point-Cloud Graph

#### Define General Settings

In [15]:
visual_style_general = {}

# Define color sets
color_set_grey = ['#A8A8A8', '#B1B1B1', '#B9B9B9', '#C2C2C2', '#CACACA', '#D3D3D3']
color_set_pink = ['#E0BBE4', '#957DAD', '#D291BC', '#FEC8D8', '#FFDFD3']
color_set_yellow = ['#FFD300', '#FFDC26', '#FFE54C', '#FFED73', '#FFF699', '#FFFFBF']
color_set_blue = ['#87CEFA', '#9BD7FB', '#B0E0FC', '#C1E9FC', '#D9F2FE', '#EDFBFF']
color_set_green = ['#ECFADC', '#DDF2D1', '#CDEBC5', '#BEE3BA', '#AEDCAE', '#9FD4A3']
color_set_blue_pastel = ['#91ABC9', '#9EB9D1', '#A5BFD3', '#ABC5D5', '#B2CBD7', '#B9D1D9']
color_set_yellow_red = ['#F87E39', '#FAB445', '#F9DB00', '#F0E990', '#EF4F31', '#CF0022']

# Set bbox and margin
visual_style_general["bbox"] = (0, 0, 1000, 1000)
visual_style_general["margin"] = 100

# Set vertex label Font
# visual_style_general["vertex_font"] = "Times New Roman"
visual_style_general["vertex_font"] = "Calibri"

# Set Vertex border color
visual_style_general["vertex_frame_color"] = '#555'

# Set Vertex border size
# visual_style_general["vertex_frame_size"] = [degree/5 for degree in g.vs['degree']]
visual_style_general["vertex_frame_size"] = "1px"

# Set Drawing order of the vertices
# Default: Vertices with a smaller order parameter will be drawn first
# visual_style_general["vertex_order"] = 

# Set label distance
visual_style_general["vertex_label_dist"] = None

# Set label color
visual_style_general["vertex_label_color"] = "#333"

# Set label angle
# visual_style_general["vertex_label_angle"] = math.radians(45)

### plot the graph

In [None]:
visual_style = {}

# Set vertex label
visual_style["vertex_label"] = None

# Set vertex colours
visual_style["vertex_color"] = g.vs['color']
# visual_style["vertex_color"] = ig.drawing.colors.GradientPalette("#E0BBE4", "#FFDFD3", 40)

# Set vertex size
visual_style["vertex_size"] = 10

# Set vertex lable size
visual_style["vertex_label_size"] = 10

# Set Edge Color
visual_style_general["edge_color"] = '#aaa'

# Set Edge Width (BOn edges' betweenness)
visual_style["edge_width"] = .5


# set Edge Arrow Size
visual_style["edge_arrow_size"] = .1


# set Edge Arrow Width
visual_style["edge_arrow_width"] = 1

# Whether curve the edges or not
visual_style["edge_curved"] = False
# visual_style["autocurve"] = True

# Set the layout (aka visualization algorithm)
# my_layout = g.layout_fruchterman_reingold()
# my_layout = g.layout_graphopt(node_charge=0.005, node_mass=100, spring_length=0, spring_constant=1.5, max_sa_movement=100)
my_layout = g.layout_graphopt(node_charge=0.005, node_mass=100, spring_length=30, spring_constant=1, max_sa_movement=100)
# my_layout = g.layout_grid()
# my_layout = g.layout_kamada_kawai()
# my_layout = g.layout_reingold_tilford_circular(mode='in')
# my_layout = g.layout_lgl()
# my_layout = g.layout_drl()
# my_layout = g.layout_mds()
# my_layout = g.layout_circle(order=sorted(g.vs['degree'], reverse=True))
# my_layout = g.layout_circle()
# my_layout = g.layout_random()
visual_style["layout"] = my_layout

# # Define Export File
# visual_style["target"] = f'point_cloud_{name}_fullgraph.pdf'

# Plot the Graph visualization
ig.plot(g, **visual_style_general, **visual_style)

## Community Detection

In [16]:
# community detection BOn a graph --> results in an instance of "VertexClustering" class
# link: https://igraph.org/python/doc/api/igraph.clustering.VertexClustering.html#cluster_graph
# link: https://igraph.org/python/tutorial/latest/analysis.html#clustering

# using InfoMap method
community_infomap = g.community_infomap(edge_weights=g.es['weight'], vertex_weights=g.vs['degree'], trials=10)
print('community_infomap_done')

# using EigenVector method (clusters BOn EigenVector value of nodes)
# result is very similar to layout of the original graph
community_eigenvector = g.community_leading_eigenvector(weights=g.es['weight'], clusters=2, arpack_options=None)
print('community_eigenvector_done')

# usin Multi Level method (a version of Louvain)(only un-directed graph)
# result is more distributed
# doesn't recognizes "resolution" parameters, as opposed to the documentation
# community_multilevel = g.community_multilevel(weights=g.es['weight'], return_levels=False)
# community_multilevel = igraph.Graph.community_multilevel(g, weights=g.es['weight'], return_levels=False, resolution=1)

# using connected components method (aka Graph.clusters())
community_components = g.components(mode='weak')
print('community_components_done')

# Using label_propagation
# good for when we have external info (e.g., ground truth)
community_label_propagation = g.community_label_propagation(weights=g.es['weight'], )
print('community_label_propagation_done')

# using leiden method (only un-directed graph)
# can (not necessarily) use external info (e.g., ground truth)
# doesn't work with "edge_weights" param which is in documentary !!!!
# But it supports "node_weights" param instead of "nodes" :) !!!!
# doesn't recognizes "normalize_resolution" parameters, as opposed to the documentation
# ** if find tune "resolution_parameter" does a pretty good job (like multilevel)
# community_leiden = g.community_leiden(objective_function='CPM', weights=g.es['weight'], node_weights=g.vs['degree'], beta=0.01, resolution_parameter=.0001, n_iterations=20)


# using spinglass method (only for connected graphs)
# community_spinglass = g.community_spinglass(weights=g.es['weight'], )

# print(help(g.community_leiden))

# # using Optimal Modularity method (exact solution, < 100 vertices)
# # very slow (in comparison with other)
# # restarts kernel in my case (cannot run it)
# community_optimalmodularity = g.community_optimal_modularity() 

community_infomap_done
community_eigenvector_done
community_components_done
community_label_propagation_done


  membership, _, q = GraphBase.community_leading_eigenvector(


### Compare community Detection methods (2 by 2)
#### By Rand Index

In [17]:
# documentation (in R but can be used for python)
# link: https://igraph.org/r/doc/compare.html

community_dict = {'infomap': community_infomap,
                  'eigenvector': community_eigenvector,
                  # 'multilevel': community_multilevel,
                  'components': community_components,
                  'label_propagation': community_label_propagation,
                #   'leiden': community_leiden
                 }

df_community_by_rand = pd.DataFrame(columns=list(community_dict), index=list(community_dict))

# Compare all community detection methods with the Rand Index
for row in list(community_dict):
    for col in list(community_dict):
        df_community_by_rand.loc[row, col] = round(ig.compare_communities(community_dict[row], community_dict[col], method="rand"), 3)
        
print("community detection comparison by Rand Index".center(65))
display(df_community_by_rand)

# table of community detection methods metrics
df_community_modularity = pd.DataFrame(columns=['modularity', '#communities'], index=list(community_dict))
df_community_modularity['modularity'] = [community_dict[row].modularity for row in list(community_dict)]
df_community_modularity['#communities'] = [community_dict[row].__len__() for row in list(community_dict)]

print("\ncommunity detection comparison by modularity & #clusters".center(65))
display(df_community_modularity)

           community detection comparison by Rand Index          


Unnamed: 0,infomap,eigenvector,components,label_propagation
infomap,1.0,0.809,0.809,0.999
eigenvector,0.809,1.0,1.0,0.809
components,0.809,1.0,1.0,0.809
label_propagation,0.999,0.809,0.809,1.0


    
community detection comparison by modularity & #clusters    


Unnamed: 0,modularity,#communities
infomap,0.832938,1540
eigenvector,0.809937,11
components,0.808264,11
label_propagation,0.696058,3325


### Print/Plot Info about the result of our Community Detection Method

In [21]:
# set which community is going to be procesed
vertex_cluster = community_eigenvector

# table of community detection methods metrics
print(f"community detection communities' stats".center(65))
df_communities_stat = pd.DataFrame(columns=['cluster_index', '#nodes', 'percentile', 'community_name', 'community_members'], index=range(vertex_cluster.__len__()))

# i: cluster_index, c: cluster (itself)
for i, c in enumerate(vertex_cluster):
    # # to have indecies of member nodes (a list)
    # members = vertex_cluster[i]
    # to have label of member nodes (a list)
    members = [vertex_cluster.graph.vs['label'][node] for node in vertex_cluster[i]]
    
    df_communities_stat.iloc[i].loc[['cluster_index', '#nodes', 'community_members']] = i, len(c), members

node_num = df_communities_stat['#nodes']
df_communities_stat['percentile'] = [round(node_num.iloc[row]/sum(node_num), 2)
                                     for row in range(len(df_communities_stat.index))]
                                   
display(df_communities_stat.sort_values('#nodes', ascending=False).reset_index(drop=True))

# Returns the histogram of cluster sizes.
# print(vertex_cluster.size_histogram())
fig, ax = plt.subplots(figsize=(10, 7))
label_font_size = 14
plt.ylabel(farsify("تعداد اعضای خوشه"), fontsize=label_font_size)
plt.xlabel(farsify("شماره خوشه"), fontsize=label_font_size)
plt.title(farsify("توزیع فراوانی گسسته گره ها نسبت به شماره خوشه ها"), fontsize=label_font_size)
# adjust the x axis params
ax.tick_params(axis='both', direction='inout', color='black', pad=5)
# plt.hist(vertex_cluster.membership)
ax = sns.histplot(data=sorted(vertex_cluster.membership), ax=ax, binwidth=1, bins=vertex_cluster.__len__(), kde=True)
plt.show()

              community detection communities' stats             


  arr_value = np.asarray(value)


Unnamed: 0,cluster_index,#nodes,percentile,community_name,community_members
0,0,6345,0.25,,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,..."
1,8,4925,0.2,,"[13879, 13880, 13881, 13882, 13883, 13884, 138..."
2,6,4867,0.19,,"[7145, 7146, 7147, 7148, 7149, 7150, 7151, 715..."
3,7,4313,0.17,,"[11891, 11892, 11893, 11894, 11895, 11896, 118..."
4,10,3510,0.14,,"[21490, 21491, 21492, 21493, 21494, 21495, 214..."
5,1,829,0.03,,"[338, 339, 340, 341, 342, 343, 344, 345, 346, ..."
6,9,148,0.01,,"[15966, 15967, 15968, 15969, 15970, 15971, 159..."
7,2,31,0.0,,"[1031, 1032, 1033, 1034, 1035, 1036, 1037, 103..."
8,4,15,0.0,,"[2203, 2204, 2205, 2206, 2207, 2211, 2212, 221..."
9,5,10,0.0,,"[2810, 2811, 2812, 2813, 2814, 2815, 2816, 281..."


### Prepare the VertexClustering obj for Visualization
#### By converting it back to a graph (but this time with cluster_index & cluster_color for each Node)

In [22]:
# re-Create the original graph after clustering
clustered_graph = vertex_cluster.graph

# # Don't know why but Subgraphs Don't work
# # retrun the nth cluster subgraph
# clustered_graph = vertex_cluster.subgraph(3)  

# add the cluster_index for each node
clustered_graph.vs['cluster_index'] = vertex_cluster.membership
# print(clustered_graph.vs['cluster_index'])

# add color (Normalized RGBA) BOn cluster number and index
# used list() since iterator gets deleted in for loop
cluster_colors = list(ig.drawing.colors.ClusterColoringPalette(n=vertex_cluster.__len__()))
clustered_graph.vs['cluster_color'] = [cluster_colors[index] for index in clustered_graph.vs['cluster_index']]

# export the graph as a file (after community detection)
clustered_graph.write_gml(f'community_{"eigenvector"}_{name}_graph.gml')

  clustered_graph.write_gml(f'community_{"eigenvector"}_{name}_graph.gml')


### create a DataFrame of each cluster (with its nodes)
#### to name each cluster (community) BOn its members

In [None]:
# Create a DataFrame of nodes and their corresponding cluster index
df_cluster = pd.DataFrame({'label': clustered_graph.vs['label'], 'cluster_id': clustered_graph.vs['cluster_index']})
display(df_cluster)

# show all the nodes in cluster_index == i
display(df_cluster[df_cluster['cluster_id'] == 1].transpose())

### visualize the Clustered Community Graph

In [None]:
visual_style = {}

# Set vertex label
visual_style["vertex_label"] = None

# set palette color
# visual_style["vertex_color"] = ig.drawing.colors.ClusterColoringPalette(n=vertex_cluster.__len__())
visual_style["vertex_color"] = clustered_graph.vs['cluster_color']

# Set vertex size
visual_style["vertex_size"] = 10 

# Set vertex lable size
visual_style["vertex_label_size"] = 10

# Set Edge Width (BOn edges' betweenness)
visual_style["edge_width"] = 0.5


# set Edge Arrow Size
visual_style["edge_arrow_size"] = .1


# set Edge Arrow Width
visual_style["edge_arrow_width"] = 1

# Whether curve the edges or not
visual_style["edge_curved"] = False
# visual_style["autocurve"] = True

# # Set the layout (aka visualization algorithm)
# my_layout = clustered_graph.layout_fruchterman_reingold()
# my_layout = clustered_graph.layout_graphopt(node_charge=0.005, node_mass=100, spring_length=0, spring_constant=1.5, max_sa_movement=100)
# my_layout = clustered_graph.layout_graphopt(node_charge=0.005, node_mass=100, spring_length=30, spring_constant=1, max_sa_movement=100)
# my_layout = clustered_graph.layout_grid()
# my_layout = clustered_graph.layout_kamada_kawai()
# my_layout = clustered_graph.layout_reingold_tilford_circular(mode='in')
# my_layout = clustered_graph.layout_lgl()
# my_layout = clustered_graph.layout_drl()
my_layout = clustered_graph.layout_mds()
# my_layout = clustered_graph.layout_circle(order=sorted(clustered_graph.vs['cluster_index'], reverse=True))
# my_layout = clustered_graph.layout_circle()
# my_layout = clustered_graph.layout_random()
visual_style["layout"] = my_layout

# # Define Export File
visual_style["target"] = f'community_eigen_{name}_fullgraph.pdf'

# Plot the Graph visualization
# can use inline=False to preview a file instead of in notebook
ig.plot(clustered_graph, **visual_style_general, **visual_style)

### Create a Merged Graph (after Community detection)
#### This way all nodes in the same cluster get merged to create a single Super Node (representing that cluster)

In [24]:
# merge all nodes (in the same cluster) to a superNode
supernode_graph = vertex_cluster.cluster_graph(combine_vertices=None, combine_edges=None)

# add the Degree of each SuperNode
supernode_graph.vs['degree'] = supernode_graph.degree()

# add #sub-nodes in each SuperNode
supernode_graph.vs['#sub-nodes'] = [len(c) for c in vertex_cluster]

# add the cluster_index of each SuperNode as its Label
supernode_graph.vs['label'] = [i for i in range(supernode_graph.vcount())]

# # export the graph as a file (after community detection)
supernode_graph.write_gml('community_eigenvector_supernode_graph.gml')

### visualize the Merged Community Graph

In [None]:
visual_style = {}

# Set vertex label
# used farsify() to Show Farsi labels
visual_style["vertex_label"] = range(supernode_graph.vcount())

# set palette color
visual_style["vertex_color"] = ig.drawing.colors.ClusterColoringPalette(n=vertex_cluster.__len__())
# visual_style["vertex_color"] = supernode_graph.vs['cluster_color']

# Set vertex size
visual_style["vertex_size"] = 30

# Set vertex lable size
visual_style["vertex_label_size"] = 20

# Set Edge Width (BOn edges' betweenness)
# visual_style['edge_width'] = [weight/5 for weight in supernode_graph.es['weight']]

# Whether curve the edges or not
visual_style["edge_curved"] = False
# visual_style["autocurve"] = True

# Set the layout (aka visualization algorithm)
# my_layout = supernode_graph.layout_fruchterman_reingold()
# my_layout = supernode_graph.layout_graphopt(node_charge=0.005, node_mass=100, spring_length=0, spring_constant=1.5, max_sa_movement=100)
# my_layout = supernode_graph.layout_graphopt(node_charge=0.005, node_mass=100, spring_length=30, spring_constant=1, max_sa_movement=100)
# my_layout = supernode_graph.layout_grid()
# my_layout = supernode_graph.layout_kamada_kawai()
# my_layout = supernode_graph.layout_reingold_tilford_circular(mode='in')
# my_layout = supernode_graph.layout_lgl()
# my_layout = supernode_graph.layout_drl()
my_layout = supernode_graph.layout_mds()
# my_layout = supernode_graph.layout_circle(order=sorted(supernode_graph.vs['cluster_index'], reverse=True))
# my_layout = supernode_graph.layout_circle()
# my_layout = supernode_graph.layout_random()
visual_style["layout"] = my_layout

# # Define Export File
# visual_style["target"] = f'supernode_{name}_graph.pdf'

# Plot the Graph visualization
# can use inline=False to preview a file instead of in notebook
# ig.plot(supernode_graph, **visual_style_general, **visual_style, mark_groups = False)