In [233]:
import os

import torch 
from torch import nn
from torch_geometric.datasets import Planetoid
from torch_geometric.utils import to_scipy_sparse_matrix
import torch.nn.functional as F 
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt



In [None]:
# Declare all hyperparameters
HIDDEN_DIM=16
DROPOUT_RATE=0.1
LEARNING_RATE=0.01
WEIGHT_DECAY=4e-5
BATCH_SIZE=256
RANDOM_SEED=42
NUM_EPOCHS=100000
NEGATIVE_EDGES=2
THRESHOLD_POSITIVE=0.5

In [235]:
# Load and preprocess the Cora dataset to obtain the adjacency matrix and node features matrix
def load_cora_and_preprocessing(path="data/cora"):
    content=pd.read_csv(f"{path}/cora.content", sep="\t", header=None)
    cites=pd.read_csv(f"{path}/cora.cites", sep="\t", header=None)

    # Extract node featrues
    paper_id=content.iloc[:,0]
    features_matrix=content.iloc[:,1:-1]
    class_label=content.iloc[:,-1]

    # Mapping paper id to index
    id_to_index={paper_id: idx for idx,paper_id in enumerate(paper_id)}
    index_to_id={idx: paper_id for idx,paper_id in enumerate(paper_id)}

    n_nodes=len(paper_id)
    adjacency_matrix=np.zeros((n_nodes,n_nodes))

    for _, row in cites.iterrows():
        citied_paper_id,citing_paper_id=row[0],row[1]
        citied_paper_idx=id_to_index[citied_paper_id]
        citing_paper_idx=id_to_index[citing_paper_id]
        adjacency_matrix[citing_paper_idx,citied_paper_idx]=1
    
    return adjacency_matrix, features_matrix, class_label, id_to_index, index_to_id


In [236]:
def graph_construction(adjacency_matrix, features_matrix, class_label,index_to_id ):
    
    G=nx.Graph(name='G')
    
    for i in range(len(index_to_id)):
        G.add_node(i,name=index_to_id[i], features=features_matrix.iloc[i], class_label=class_label[i])
    
    # Add edges to the graph
    for i in range(len(adjacency_matrix)):
        for j in range(len(adjacency_matrix)):
            if adjacency_matrix[i][j] == 1:
                G.add_edge(i, j)
                G.add_edge(j,i)

    G=G.to_undirected()
    return G



In [237]:
class GraphConv(nn.Module):
    def __init__(self, input_size,output_size,dropout_rate):
        super(GraphConv, self).__init__()
        self.fc1=nn.Linear(input_size, output_size)
        self.dropout=nn.Dropout(dropout_rate)
        self.relu1=nn.ReLU()

    def forward(self, x):
        x= self.fc1(x)
        x= self.relu1(x)
        x= self.dropout(x)
        return x
    

In [238]:
class Model(nn.Module):
    def __init__(self, input_size,hidden_size,output_size,dropout_rate):
        super().__init__()
        self.conv1=GraphConv(input_size, hidden_size, dropout_rate)
        self.conv2=GraphConv(hidden_size, hidden_size, dropout_rate)
        

    def forward(self, x):
        x=self.conv1(x)
        x=self.conv2(x)
        return F.log_softmax(x,dim=1)

In [239]:
def main():
    adjacencymatrix,features_matrix,class_label,id_to_index,index_to_id = load_cora_and_preprocessing()
    G=graph_construction(adjacencymatrix,features_matrix,class_label,index_to_id)

    print("number of edges in the graph: ", G.number_of_edges())

    device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model=Model(input_size=features_matrix.shape[1], hidden_size=HIDDEN_DIM, output_size=class_label.nunique(), dropout_rate=DROPOUT_RATE).to(device)
    optimizer=torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    data=Planetoid(root='data/Cora', name='Cora')
    data=data[0]
    model.train()
    for epoch in range(NUM_EPOCHS):
        optimizer.zero_grad()
        out=model(data.x)
        loss=F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        print(f'Epoch {epoch+1}/{NUM_EPOCHS}, Loss: {loss.item():.4f}')   
    model.eval()
    pred=model(data.x).argmax(dim=1)
    correct=(pred[data.test_mask]==data.y[data.test_mask]).sum()
    acc=correct.item()/data.test_mask.sum().item()
    print(f'Accuracy: {acc:.4f}')
    # data=



In [240]:
if __name__ == "__main__":
    main()

number of edges in the graph:  5278
Epoch 1/1000, Loss: 2.7814
Epoch 2/1000, Loss: 2.7400
Epoch 3/1000, Loss: 2.6668
Epoch 4/1000, Loss: 2.5779
Epoch 5/1000, Loss: 2.4813
Epoch 6/1000, Loss: 2.4032
Epoch 7/1000, Loss: 2.2625
Epoch 8/1000, Loss: 2.0999
Epoch 9/1000, Loss: 2.0578
Epoch 10/1000, Loss: 1.8600
Epoch 11/1000, Loss: 1.6850
Epoch 12/1000, Loss: 1.5620
Epoch 13/1000, Loss: 1.4688
Epoch 14/1000, Loss: 1.4799
Epoch 15/1000, Loss: 1.3726
Epoch 16/1000, Loss: 1.2032
Epoch 17/1000, Loss: 1.1637
Epoch 18/1000, Loss: 1.0647
Epoch 19/1000, Loss: 1.0100
Epoch 20/1000, Loss: 0.9215
Epoch 21/1000, Loss: 0.9095
Epoch 22/1000, Loss: 0.9655
Epoch 23/1000, Loss: 0.8913
Epoch 24/1000, Loss: 0.8574
Epoch 25/1000, Loss: 1.0758
Epoch 26/1000, Loss: 0.7097
Epoch 27/1000, Loss: 0.8297
Epoch 28/1000, Loss: 0.9050
Epoch 29/1000, Loss: 0.7230
Epoch 30/1000, Loss: 0.6904
Epoch 31/1000, Loss: 0.6486
Epoch 32/1000, Loss: 0.7128
Epoch 33/1000, Loss: 0.7170
Epoch 34/1000, Loss: 0.6947
Epoch 35/1000, Loss: 