In [97]:
import sys

In [98]:
sys.path.append("..")

In [99]:
import torch
from src.data.graph import Graph

In [100]:
!pwd

/Users/eir/Downloads/11441/mlwg_f24_hw4-main/notebooks


#### We will start exploring the graph by instantiating a Graph object. That requires defining an argument dictionary.


In [101]:
args = {
        "graph": "cora",  # name of the graph
        "basepath": "../data",  # path to the directory that has the graph files
        "task": "classify",  # the task
        "test_frac": 0.20,  #  fraction of the edges to be used as test split
        "val_frac": 0.20,  #  fraction of the edges to be used as val split
        "gpu": False  # we don't need a GPU for exploring the graph
        } 

In [102]:
graph = Graph(**args)

Loading cora dataset...
Found 5429 edges


### Each graph object has three important attributes
1. Node features
2. Node labels
3. The adjacency matrix (sparse coo matrix)

In [103]:
# extract the number of nodes and features
num_nodes, num_features = graph.features.shape
num_nodes, num_features

(2708, 1433)

In [104]:
graph.labels

tensor([2, 5, 4,  ..., 1, 0, 2])

##### Extract the ede information

In [105]:
edges = graph.adj.coalesce().indices().T.tolist()

In [106]:
edges[:4]  # edges is a list of [node1, node2] elements, indicating an edge between node1 and node 2 

[[0, 0], [0, 8], [0, 14], [0, 258]]

In [107]:
num_edges = len(edges);
num_edges

13264

In [108]:
from collections import defaultdict
def find_degree_per_node(edges):
    in_degree = defaultdict(int)

    for (src, tgt) in edges:
        in_degree[tgt] += 1
    
    avg_in_degree = sum(in_degree.values()) / len(in_degree)
    max_in_degree = max(in_degree.values())
    min_in_degree = min(in_degree.values())
    
    
    print(avg_in_degree, max_in_degree, min_in_degree)
    
        

In [109]:
find_degree_per_node(edges)

4.89807976366322 169 2


In [110]:
from collections import defaultdict

def graph_statistics(graph):
    # Extracting node and feature information
    num_nodes, num_features = graph.features.shape
    edges = graph.adj.coalesce().indices().T.tolist()
    num_edges = len(edges)
    
    # Calculating in-degrees
    in_degree = defaultdict(int)
    for (src, tgt) in edges:
        in_degree[tgt] += 1
    
    avg_in_degree = sum(in_degree.values()) / len(in_degree) if in_degree else 0
    max_in_degree = max(in_degree.values()) if in_degree else 0
    min_in_degree = min(in_degree.values()) if in_degree else 0
    
    # Print statistics
    print(f"Graph: {args['graph']}")
    print(f"Max in-degree: {max_in_degree}")
    print(f"Min in-degree: {min_in_degree}")
    print(f"Average in-degree: {avg_in_degree:.2f}")
    print(f"Number of nodes: {num_nodes}")
    print(f"Number of edges: {num_edges}")
    print(f"Node feature dimension: {num_features}")

graph_statistics(graph)

Graph: cora
Max in-degree: 169
Min in-degree: 2
Average in-degree: 4.90
Number of nodes: 2708
Number of edges: 13264
Node feature dimension: 1433
