In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import plotly.express as px
import pandas as pd
import numpy as np
import math

!pip install leidenalg python-igraph

# graph related library
import networkx as nx
import leidenalg
import igraph as ig
import community as community_louvain
import networkx.algorithms.community as nx_comm

Collecting leidenalg
  Downloading leidenalg-0.10.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting python-igraph
  Downloading python_igraph-0.11.8-py3-none-any.whl.metadata (2.8 kB)
Collecting igraph<0.12,>=0.10.0 (from leidenalg)
  Downloading igraph-0.11.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting texttable>=1.6.2 (from igraph<0.12,>=0.10.0->leidenalg)
  Downloading texttable-1.7.0-py2.py3-none-any.whl.metadata (9.8 kB)
Downloading leidenalg-0.10.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_igraph-0.11.8-py3-none-any.whl (9.1 kB)
Downloading igraph-0.11.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m87.0 MB/s[0m eta [36m0:00:00[0m
[?25

In [None]:
cora = pd.read_csv('cora.cites',
                   sep='\t', header=None,
                   names=['node1', 'node2'])

# Create graph from edge list
cora_network = nx.Graph(cora.values.tolist())


In [None]:
# calculate the 2-node, 3-node graphlets.  and use them as node features

import pandas as pd
# Function to calculate 2-node and 3-node graphlet counts as node features
def graphlet_features(graph):
  features = {}

  # 2-node graphlets (Edges)
  for node in graph.nodes():
    features[node] = {'edges': graph.degree[node]} # Degree is the number of edges connected to the node

  # 3-node graphlets
  # Triangles (Cycles of length 3)
  triangles = nx.triangles(graph)
  for node, count in triangles.items():
      features[node]['triangles'] = count

  # Paths of length 2 (Two edges connected at the node)
  for node in graph.nodes():
      paths_len_2 = 0
      for neighbor1 in graph.neighbors(node):
          for neighbor2 in graph.neighbors(node):
              if neighbor1 != neighbor2 and not graph.has_edge(neighbor1, neighbor2):
                  paths_len_2 += 1
      # Divide by 2 because each path is counted twice (once for each endpoint)
      features[node]['paths_len_2'] = paths_len_2 // 2

  return features

# Calculate graphlet features for the cora_network
cora_graphlet_features = graphlet_features(cora_network)

# Convert the features to a DataFrame for easier handling
cora_graphlet_features_df = pd.DataFrame.from_dict(cora_graphlet_features, orient='index')

# Print the first few rows of the features DataFrame
print(cora_graphlet_features_df.head())

# Example of how you can use these features (e.g., adding them to the node attributes in the graph)
for node, feats in cora_graphlet_features.items():
    cora_network.nodes[node].update(feats)

# You can now access these features from the graph's nodes
# print(cora_network.nodes[0])


         edges  triangles  paths_len_2
35         168        160        13868
1033         5          1            9
103482       6          2           13
103515      11          9           46
1050679      4          1            5


Use the ORCA algorithm to count higher-degree graphlets efficiently
https://github.com/thocevar/orca