In [None]:
import networkx as nx
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import normalized_mutual_info_score as NMI, adjusted_rand_score as ARI
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
from community import community_louvain
from node2vec import Node2Vec

file_path = r'D:\New folder (4)\facebook_combined.txt\facebook_combined.txt'

G = nx.read_edgelist(file_path, nodetype=int)

node_features = np.random.rand(G.number_of_nodes(), 16)

data = from_networkx(G)
data.x = torch.tensor(node_features, dtype=torch.float)


class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

def compute_node_embeddings(data, num_clusters):
    model = GCN(data.x.shape[1], num_clusters)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = torch.nn.CrossEntropyLoss()
    model.train()

    with torch.no_grad():
        initial_embeddings = model(data.x, data.edge_index)
        kmeans = KMeans(n_clusters=num_clusters).fit(initial_embeddings.detach().numpy())
        labels = torch.tensor(kmeans.labels_, dtype=torch.long)

    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        embeddings = model(data.x, data.edge_index).numpy()
    return embeddings

num_clusters = 10

gcn_embeddings = compute_node_embeddings(data, num_clusters)

kmeans = KMeans(n_clusters=num_clusters).fit(gcn_embeddings)
hierarchical = AgglomerativeClustering(n_clusters=num_clusters).fit(gcn_embeddings)
spectral = SpectralClustering(n_clusters=num_clusters, affinity='nearest_neighbors').fit(gcn_embeddings)
louvain = community_louvain.best_partition(G)


def deepwalk_embedding(G, dimensions=64, walk_length=30, num_walks=200, workers=4):
    node2vec = Node2Vec(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers)
    model = node2vec.fit(window=10, min_count=1)
    embeddings = np.array([model.wv[str(node)] for node in G.nodes()])
    return embeddings

deepwalk_embeddings = deepwalk_embedding(G)

node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4).fit()
node2vec_embeddings = np.array([node2vec.wv[str(node)] for node in G.nodes()])

deepwalk_clusters = KMeans(n_clusters=num_clusters).fit_predict(deepwalk_embeddings)
node2vec_clusters = KMeans(n_clusters=num_clusters).fit_predict(node2vec_embeddings)


def compute_metrics(G, labels):
    modularity = nx.algorithms.community.quality.modularity(G, [list(np.where(labels == c)[0]) for c in np.unique(labels)])
    nmi = NMI(list(G.nodes), labels)
    ari = ARI(list(G.nodes), labels)
    return modularity, nmi, ari

metrics = {
    'Hierarchical Clustering': compute_metrics(G, hierarchical.labels_),
    'Spectral Clustering': compute_metrics(G, spectral.labels_),
    'Louvain Method': compute_metrics(G, list(louvain.values())),
    'DeepWalk': compute_metrics(G, deepwalk_clusters),
    'Node2Vec': compute_metrics(G, node2vec_clusters),
    'GCN': compute_metrics(G, kmeans.labels_)
}


df = pd.DataFrame(metrics, index=['Modularity', 'NMI', 'ARI']).transpose()


print(df)


Computing transition probabilities:   0%|          | 0/4039 [00:01<?, ?it/s]

Computing transition probabilities:   0%|          | 0/4039 [00:00<?, ?it/s]

                         Modularity       NMI  ARI
Hierarchical Clustering    0.007645  0.406231  0.0
Spectral Clustering        0.009433  0.392169  0.0
Louvain Method             0.201804  0.458522  0.0
DeepWalk                   0.223777  0.421839  0.0
Node2Vec                   0.246428  0.414344  0.0
GCN                        0.006494  0.395117  0.0


In [None]:
import networkx as nx
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering
from sklearn.metrics import normalized_mutual_info_score as NMI, adjusted_rand_score as ARI
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
from community import community_louvain
from node2vec import Node2Vec

file_path = r'D:\New folder (4)\facebook_combined.txt\facebook_combined.txt'

G = nx.read_edgelist(file_path, nodetype=int)

node_features = np.random.rand(G.number_of_nodes(), 16)

data = from_networkx(G)
data.x = torch.tensor(node_features, dtype=torch.float)


class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_channels, 16)
        self.conv2 = GCNConv(16, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return x

def compute_node_embeddings(data, num_clusters):
    model = GCN(data.num_node_features, num_clusters)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    criterion = torch.nn.CrossEntropyLoss()
    model.train()

    with torch.no_grad():
        initial_embeddings = model(data.x, data.edge_index)
        kmeans = KMeans(n_clusters=num_clusters).fit(initial_embeddings.detach().numpy())
        labels = torch.tensor(kmeans.labels_, dtype=torch.long)

    for epoch in range(200):
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = criterion(out, labels)
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        embeddings = model(data.x, data.edge_index).numpy()
    return embeddings

def conductance(G, clusters):
    total_cut = 0
    for cluster in clusters:
        cut = nx.algorithms.cuts.cut_size(G, cluster)
        volume = sum(dict(G.degree(cluster)).values())
        if volume == 0:
            continue
        total_cut += cut / volume
    return total_cut / len(clusters)

num_clusters = 10
gcn_embeddings = compute_node_embeddings(data, num_clusters)

kmeans = KMeans(n_clusters=num_clusters).fit(gcn_embeddings)
hierarchical = AgglomerativeClustering(n_clusters=num_clusters).fit(gcn_embeddings)
spectral = SpectralClustering(n_clusters=num_clusters, affinity='nearest_neighbors').fit(gcn_embeddings)
louvain = community_louvain.best_partition(G)


def deepwalk_embedding(G, dimensions=64, walk_length=30, num_walks=200, workers=4):
    node2vec = Node2Vec(G, dimensions=dimensions, walk_length=walk_length, num_walks=num_walks, workers=workers)
    model = node2vec.fit(window=10, min_count=1)
    embeddings = np.array([model.wv[str(node)] for node in G.nodes()])
    return embeddings

deepwalk_embeddings = deepwalk_embedding(G)


node2vec = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4).fit()
node2vec_embeddings = np.array([node2vec.wv[str(node)] for node in G.nodes()])

deepwalk_clusters = KMeans(n_clusters=num_clusters).fit_predict(deepwalk_embeddings)
node2vec_clusters = KMeans(n_clusters=num_clusters).fit_predict(node2vec_embeddings)

def compute_metrics(G, labels):
    conductance_value = conductance(G, [list(np.where(labels == c)[0]) for c in np.unique(labels)])
    return  conductance_value

metrics = {
    'KMeans': compute_metrics(G, kmeans.labels_),
    'Hierarchical Clustering': compute_metrics(G, hierarchical.labels_),
    'Spectral Clustering': compute_metrics(G, spectral.labels_),
    'Louvain Method': compute_metrics(G, np.array(list(louvain.values()))),
    'DeepWalk': compute_metrics(G, deepwalk_clusters),
    'Node2Vec': compute_metrics(G, node2vec_clusters),
    'GCN': compute_metrics(G, kmeans.labels_)
}


df = pd.DataFrame(metrics, index=['Conductance']).transpose()


print(df)


Computing transition probabilities:   0%|          | 0/4039 [00:00<?, ?it/s]

Computing transition probabilities:   0%|          | 0/4039 [00:00<?, ?it/s]

                         Conductance
KMeans                      0.894359
Hierarchical Clustering     0.895845
Spectral Clustering         0.895565
Louvain Method              0.745604
DeepWalk                    0.621720
Node2Vec                    0.589396
GCN                         0.894359
