In [1]:
import os
import networkx as nx
from node2vec import Node2Vec
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
graph_dir = "data/sample_set"
graphs = []
labels = []

for file_name in os.listdir(graph_dir):
    file_path = os.path.join(graph_dir, file_name)
    if file_path.endswith(".dat"):  # Assuming your graphs are stored as text files
        graph = nx.read_edgelist(file_path)
        arr = file_name.split("_")
        label = arr[len(arr) - 1]  # Extract label from file name
        graphs.append(graph)
        labels.append(label)

In [3]:
# Node2Vec graph embedding
smallSetEmbeddings = {}

for graph in graphs:
    # Node2Vec embedding for each graph
    node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=4)
    model = node2vec.fit(window=10, min_count=1, batch_words=4)
    embeddings = {node: model.wv[node] for node in graph.nodes}
    
    # Aggregate node embeddings (e.g., average)
    smallSetEmbeddings = sum(embeddings.values()) / len(embeddings)
    
    # Store graph embedding
    smallSetEmbeddings[graph] = smallSetEmbeddings

# Feature Extraction
X = list(smallSetEmbeddings.values())
y = labels

Computing transition probabilities: 100%|██████████| 1000/1000 [00:00<00:00, 5294.19it/s]


In [None]:
# Node2Vec graph embedding
graph_embeddings = {}

for graph in graphs:
    # Node2Vec embedding for each graph
    node2vec = Node2Vec(graph, dimensions=64, walk_length=30, num_walks=200, workers=4)
    model = node2vec.fit(window=10, min_count=1, batch_words=4)
    embeddings = {node: model.wv[node] for node in graph.nodes}
    
    # Aggregate node embeddings (e.g., average)
    graph_embedding = sum(embeddings.values()) / len(embeddings)
    
    # Store graph embedding
    graph_embeddings[graph] = graph_embedding

# Feature Extraction
X = list(graph_embeddings.values())
y = labels


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Random Forest Accuracy: {accuracy}")