# Node classification on Cora Dataset with best paramaters

In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [2]:
from node2vec_algo.pytorch_node2vec import GraphEmbeddingpy, SkipGram

<IPython.core.display.Javascript object>

In [3]:
import os
import collections
import numpy as np
import pandas as pd
import json
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.sparse as sp
import torch
from torch import Tensor
import torch_geometric
from torch_geometric.utils import to_networkx
from torch_geometric.datasets import Planetoid
import networkx as nx
from networkx.algorithms import community

<IPython.core.display.Javascript object>

In [4]:
# Set random seed for reproducibility
SEED = 42
np.random.seed(SEED)
data_dir = "Data/"
os.makedirs(data_dir, exist_ok=True)

<IPython.core.display.Javascript object>

In [5]:
dataset = Planetoid(root=data_dir, name="Cora")
print("Dataset properties")
print("==============================================================")
print(f"Dataset: {dataset}")  # This prints the name of the dataset
print(f"Number of graphs in the dataset: {len(dataset)}")
print(
    f"Number of features: {dataset.num_features}"
)  # Number of features each node in the dataset has
print(
    f"Number of classes: {dataset.num_classes}"
)  # Number of classes that a node can be classified into

Dataset properties
Dataset: Cora()
Number of graphs in the dataset: 1
Number of features: 1433
Number of classes: 7


<IPython.core.display.Javascript object>

In [6]:
data = dataset[0]
print("Graph properties")
print("==============================================================")

# Gather some statistics about the graph.
print(f"Number of nodes: {data.num_nodes}")  # Number of nodes in the graph
print(f"Number of edges: {data.num_edges}")  # Number of edges in the graph
print(
    f"Average node degree: {data.num_edges / data.num_nodes:.2f}"
)  # Average number of nodes in the graph
print(
    f"Contains isolated nodes: {data.has_isolated_nodes()}"
)  # Does the graph contains nodes that are not connected
print(
    f"Contains self-loops: {data.has_self_loops()}"
)  # Does the graph contains nodes that are linked to themselves
print(f"Is undirected: {data.is_undirected()}")  # Is the graph an undirected graph

Graph properties
Number of nodes: 2708
Number of edges: 10556
Average node degree: 3.90
Contains isolated nodes: False
Contains self-loops: False
Is undirected: True


<IPython.core.display.Javascript object>

In [7]:
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

<IPython.core.display.Javascript object>

In [8]:
print(f"Average node degree: {data.num_edges / data.num_nodes:.2f}")

Average node degree: 3.90


<IPython.core.display.Javascript object>

In [9]:
G = to_networkx(data, to_undirected=True)

<IPython.core.display.Javascript object>

In [10]:
def loss_function(Z, S):
    """
    Calculate the objective function ||Z^T*Z - S||^2.

    Args:
        Z (numpy.ndarray): The node embeddings as a numpy array.
        S (numpy.ndarray): The similarity matrix.

    Returns:
        float: The value of the objective function.
    """
    Z_transpose = np.transpose(Z)
    diff = np.dot(Z_transpose, Z) - S
    #     obj_func = np.linalg.norm(diff) ** 2
    obj_func = np.linalg.norm(diff)
    return obj_func

<IPython.core.display.Javascript object>

In [11]:
# Load the parameters from the JSON file
with open("best_hyperparameters.json", "r") as json_file:
    hyperparameters = json.load(json_file)

<IPython.core.display.Javascript object>

In [12]:
hyperparameters

{'return_param': 2.832290311184182,
 'in_out_param': 1.3013213230530574,
 'num_walks': 6,
 'walk_length': 77,
 'window_size': 8,
 'dimension': 256,
 'epochs': 50,
 'negative': 3,
 'batch_words': 16,
 'learning_rate': 0.0001,
 'min_count': 1,
 'weight_decay': 0.0001}

<IPython.core.display.Javascript object>

In [13]:
# # Define the hyperparameter search space
# param_space = {
#     "return_param": np.random.uniform(0.0, 4.0, 50),  # Example: random uniform sampling
#     "in_out_param": np.random.uniform(0.0, 4.0, 50),  # Example: random uniform sampling
#     "num_walks": np.random.randint(3, 12, 8),  # Example: random integer sampling
#     "walk_length": np.random.randint(50, 100, 10),  # Example: random integer sampling
#     "window_size": np.random.randint(5, 12, 5),  # Example: random integer sampling
#     "dimension": np.random.choice([32, 64, 128, 256], 6),  # Example: random choice
#     #     "epochs": np.random.choice([10, 20, 30, 50, 60], 5),  # Example: random choice
#     "epochs": 5,  # Example: random choice
#     "negative": np.random.choice([3, 6], 5),  # Example: random choice
#     "batch_words": np.random.choice([8, 16], 5),  # Example: random choice
#     "learning_rate": np.random.choice([0.01, 0.0001, 0.02], 10),
#     "min_count": np.random.choice([0, 1, 2], 3),
#     "weight_decay": np.random.choice([1e-3, 1e-4, 1e-5, 1e-2], 4),
# }

# best_loss = float("inf")
# best_params = None

<IPython.core.display.Javascript object>

In [14]:
# Create an instance of your GraphEmbedding class with sampled hyperparameters
embedding = GraphEmbeddingpy(
    graph=G,
    return_param=hyperparameters["return_param"],
    in_out_param=hyperparameters["in_out_param"],
    num_walks=hyperparameters["num_walks"],
    walk_length=hyperparameters["walk_length"],
)


<IPython.core.display.Javascript object>

In [15]:
probab = embedding.calculate_transition_probabilities()
# Train and evaluate with the current set of hyperparameters
walks, uns = embedding.generate_random_walks()
S = embedding.calculate_probability_matrix(uns)

Calculating Transition Probabilities: 100%|██████████████████████████████████████████████████████████████████████████| 2708/2708 [00:00<00:00, 9684.00it/s]
Generating Random Walks: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 2708/2708 [02:52<00:00, 15.68it/s]
Calculating Probabilities: 100%|███████████████████████████████████████████████████████████████████████████████████████| 2708/2708 [10:36<00:00,  4.26it/s]


<IPython.core.display.Javascript object>

In [17]:
node_embeddings = embedding.training(
    walks,
    window_size=hyperparameters["window_size"],
    dimension=hyperparameters["dimension"],
    epochs=hyperparameters['epochs'],
    weight_decay=hyperparameters["weight_decay"],
    learning_rate=hyperparameters["learning_rate"],
)


Training Word2Vec:   0%|                                                                                                            | 0/50 [35:02<?, ?it/s]


KeyboardInterrupt: 

<IPython.core.display.Javascript object>

In [None]:
embeddings_gen = node_embeddings.in_embed.weight.data
Z = embeddings.T

In [None]:
losc_cal = loss_function(Z, S)

## Generating Node Embeddings using created Node2vec

## Generating Node Embeddings using Node2vec

## Generating Node Embeddings using Deepwalk