# Notebook testing NN for predicting DWPC based on source and target degree

In [None]:
# Import all required libraries
import os
import random
import requests
from typing import List, Dict, Tuple

# Data manipulation and scientific computing  
import numpy as np
import networkx as nx

# Machine learning and neural networks
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Domain-specific libraries
import hetmatpy
from hetmatpy.hetmat import hetmat_from_graph, HetMat
import hetnetpy



## 1. Load Null graphs

In [None]:
# Define the base URL for the null graphs
base_url = "https://github.com/greenelab/connectivity-search-analyses/raw/main/data/hetionet-v1.0.hetmat/permutations/00{}.hetmat"

# Initialize list to store null graphs
null_graphs = []

# function to download files locally
def download_file(url: str, dest_folder: str):
    if not os.path.exists(dest_folder):
        os.makedirs(dest_folder)
    response = requests.get(url)
    if response.status_code == 200:
        file_path = os.path.join(dest_folder, os.path.basename(url))
        with open(file_path, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded {url} to {file_path}")
    else:
        print(f"Failed to download {url}")

# downloading null hetmats
for i in range(1, 4):  # This will get graphs 01, 02, 03
    url = base_url.format(i)
    download_file(url, "../hetionet-v1.0.hetmat/permutations")

# load graphs
for i in range(1, 4):
    null_graph = hetmatpy.hetmat.HetMat(f"../hetionet-v1.0.hetmat/permutations/00{i}.hetmat")
    null_graphs.append(null_graph)
    print(f"\nNull graph {i} stats:")
    num_nodes = len(null_graph.node_ids)
    num_edges = sum(len(edge_matrix.nonzero()[0]) for edge_matrix in null_graph.edge_matrices.values())
    print(f"Number of nodes: {num_nodes}")
    print(f"Number of edges: {num_edges}")

Downloaded https://github.com/greenelab/connectivity-search-analyses/raw/main/data/hetionet-v1.0.hetmat/permutations/001.hetmat to ../hetionet-v1.0.hetmat/permutations/001.hetmat
Downloaded https://github.com/greenelab/connectivity-search-analyses/raw/main/data/hetionet-v1.0.hetmat/permutations/002.hetmat to ../hetionet-v1.0.hetmat/permutations/002.hetmat
Downloaded https://github.com/greenelab/connectivity-search-analyses/raw/main/data/hetionet-v1.0.hetmat/permutations/002.hetmat to ../hetionet-v1.0.hetmat/permutations/002.hetmat
Downloaded https://github.com/greenelab/connectivity-search-analyses/raw/main/data/hetionet-v1.0.hetmat/permutations/003.hetmat to ../hetionet-v1.0.hetmat/permutations/003.hetmat

Null graph 1 stats:
Downloaded https://github.com/greenelab/connectivity-search-analyses/raw/main/data/hetionet-v1.0.hetmat/permutations/003.hetmat to ../hetionet-v1.0.hetmat/permutations/003.hetmat

Null graph 1 stats:


AttributeError: 'HetMat' object has no attribute 'node_df'

In [None]:
url = 'https://github.com/dhimmel/hetionet/raw/76550e6c93fbe92124edc71725e8c7dd4ca8b1f5/hetnet/json/hetionet-v1.0.json.bz2'
graph = hetnetpy.readwrite.read_graph(url)

In [None]:
nodes = graph.get_nodes()
# Get total number of nodes
num_nodes = len(list(nodes))
print(f"Total number of nodes: {num_nodes}")

# Get total number of edges
edges = graph.get_edges()
num_edges = len(list(edges))
print(f"Total number of edges: {num_edges}")

Total number of nodes: 47031
Total number of edges: 2250197


In [None]:
# Convert hetionet graph to networkx graph
G_nx = nx.MultiGraph()

# Add nodes with attributes from hetionet graph
for node in graph.get_nodes():
    G_nx.add_node(node.identifier, 
                  node_type=node.metanode.identifier,
                  name=node.name,
                  data=node.data)

# Add edges from hetionet graph
for edge in graph.get_edges():
    source = edge.source.identifier
    target = edge.target.identifier
    G_nx.add_edge(source, target, 
                  edge_type=edge.metaedge.kind,
                  data=edge.data)

print(f"Created NetworkX graph with {G_nx.number_of_nodes()} nodes and {G_nx.number_of_edges()} edges")

Created NetworkX graph with 47031 nodes and 2250197 edges


In [None]:
# Check for disease nodes in G_nx
disease_nodes = [n for n, d in G_nx.nodes(data=True) if d.get('node_type') == 'Disease']
print(f"Number of disease nodes: {len(disease_nodes)}")
print("Sample disease nodes:")
for n in disease_nodes[:5]:
    print(n)

Number of disease nodes: 137
Sample disease nodes:
DOID:14227
DOID:9352
DOID:8778
DOID:5612
DOID:363


In [None]:
# Get unique node types from G_nx
node_types = set(data['node_type'] for _, data in G_nx.nodes(data=True))


# Print count of nodes for each type
print("\nNode count by type:")
for node_type in sorted(node_types):
    count = sum(1 for _, data in G_nx.nodes(data=True) if data['node_type'] == node_type)
    print(f"- {node_type}: {count} nodes")


Node count by type:
- Anatomy: 402 nodes
- Biological Process: 11381 nodes
- Cellular Component: 1391 nodes
- Compound: 1552 nodes
- Disease: 137 nodes
- Gene: 20945 nodes
- Molecular Function: 2884 nodes
- Pathway: 1822 nodes
- Pharmacologic Class: 345 nodes
- Side Effect: 5734 nodes
- Symptom: 438 nodes


In [None]:
# Get unique edge types from G_nx
edge_types = set(data['edge_type'] for u, v, data in G_nx.edges(data=True))

# Print the unique edge types
print("Unique edge types in the graph:")
for edge_type in sorted(edge_types):
    print(f"- {edge_type}")

Unique edge types in the graph:
- associates
- binds
- causes
- covaries
- downregulates
- expresses
- includes
- interacts
- localizes
- palliates
- participates
- presents
- regulates
- resembles
- treats
- upregulates
