In [1]:
import numpy as np
import torch 
import pandas as pd
import torch_geometric.transforms as T
import os 
import json
from NetworkAnalysis import Create_heterogeneous_graph


# For pan-cancer dataset

In [2]:
files = os.listdir('./config/experiments')
files = [file for file in files if 'C4' not in file and 'gnn_gnn' in file]
print(files)

['gnn_gnn_expression_marker_genes_cgp.json', 'gnn_gnn_expression_hvg_cgp.json', 'gnn_gnn_cnv_cgp.json', 'gnn_gnn_MOSA_cgp.json']


In [3]:
for file in files:
    # Load configuration from the JSON file
    path = './config/experiments/' + file
    with open(path, 'r') as config_file:
        config = json.load(config_file)

    # Extract parameters with new structure
    graph_params = config["graph_parameters"]

    graph_creator = Create_heterogeneous_graph(
        BASE_PATH=graph_params["base_path"],
        cancer_type=graph_params["cancer_type"],
        cell_feature=graph_params["cell_feat_name"],
        gene_feature=graph_params["gene_feat_name"],
        metapaths=graph_params["metapaths"]
    )
    heterodata_obj = graph_creator.run_pipeline()


Loading data...
Continuing with Gene1 and Gene2 as columns for the nodes
14034 Nodes and 278974 interactions
Filtering data for network membership...
Filtering for informative genes...
Creating dependency network using informative genes...
Processing gene features...
Processing cell features...
Processing features for 1160 cells, feature type: expression_marker_genes
['ACH-000047', 'ACH-000246', 'ACH-000426', 'ACH-001039', 'ACH-001065'] ...
Applied StandardScaler normalization to cell features (mean=[1.54251523 6.21231691 0.90165888 0.59538907 0.79820183]..., var=[1.42580704 3.77323214 2.47712332 1.44162096 1.68399769]...)
Final network will have 13398 genes and 1090 cell lines
Creating mutation network...
Loading mutation data from ./Data/Depmap/OmicsSomaticMutationsMatrixDamaging.csv (damaging) and ./Data/Depmap/OmicsSomaticMutationsMatrixHotspot.csv (hotspot)
Created mutation network with 53728 edges from damaging and hotspot data.
Adding metapaths to the graph...
Adding the followi

  out = torch.matmul(sparse_input, other)


HeteroData saved to ./Data/multigraphs/heteroData_gene_cell_All_cgp_expression_marker_genes_META2.pt
Loading data...
Continuing with Gene1 and Gene2 as columns for the nodes
14034 Nodes and 278974 interactions
Filtering data for network membership...
Filtering for informative genes...
Creating dependency network using informative genes...
Processing gene features...
Processing cell features...
Processing features for 1160 cells, feature type: expression_hvg
['ACH-000047', 'ACH-000246', 'ACH-000426', 'ACH-001039', 'ACH-001065'] ...
Applied StandardScaler normalization to cell features (mean=[6.03427455 2.20304538 5.76257489 6.0985331  0.6996399 ]..., var=[ 7.41294633  4.26842229 13.83660647 13.40056616  3.73837251]...)
Final network will have 13398 genes and 1090 cell lines
Creating mutation network...
Loading mutation data from ./Data/Depmap/OmicsSomaticMutationsMatrixDamaging.csv (damaging) and ./Data/Depmap/OmicsSomaticMutationsMatrixHotspot.csv (hotspot)
Created mutation network wit

# For Neuroblastoma

In [4]:
files = os.listdir('./config/experiments')
files = [file for file in files if 'gnn_gnn' in file]
print(files)

['gnn_gnn_expression_marker_genes_cgp.json', 'gnn_gnn_expression_marker_genes_C4.json', 'gnn_gnn_cnv_C4.json', 'gnn_gnn_expression_hvg_cgp.json', 'gnn_gnn_cnv_cgp.json', 'gnn_gnn_MOSA_cgp.json', 'gnn_gnn_expression_hvg_C4.json', 'gnn_gnn_MOSA_C4.json']


In [5]:
for file in files:
    # Load configuration from the JSON file
    path = './config/experiments/' + file
    with open(path, 'r') as config_file:
        config = json.load(config_file)

    # Extract parameters with new structure
    graph_params = config["graph_parameters"]

    graph_creator = Create_heterogeneous_graph(
        BASE_PATH=graph_params["base_path"],
        cancer_type="Neuroblastoma",
        cell_feature=graph_params["cell_feat_name"],
        gene_feature=graph_params["gene_feat_name"],
        metapaths=graph_params["metapaths"]
    )
    heterodata_obj = graph_creator.run_pipeline()

Loading data...
Continuing with Gene1 and Gene2 as columns for the nodes
14034 Nodes and 278974 interactions
Filtering data for network membership...
Filtering for informative genes...
Creating dependency network using informative genes...
Processing gene features...
Processing cell features...
Processing features for 39 cells, feature type: expression_marker_genes
['ACH-002083', 'ACH-002261', 'ACH-002278', 'ACH-002280', 'ACH-002282'] ...
Applied StandardScaler normalization to cell features (mean=[1.9776399  6.99115479 0.37844806 0.11597532 0.15120327]..., var=[0.35065828 0.38319137 0.38251511 0.02051516 0.05406979]...)
Final network will have 13398 genes and 32 cell lines
Creating mutation network...
Loading mutation data from ./Data/Depmap/OmicsSomaticMutationsMatrixDamaging.csv (damaging) and ./Data/Depmap/OmicsSomaticMutationsMatrixHotspot.csv (hotspot)
Created mutation network with 588 edges from damaging and hotspot data.
Adding metapaths to the graph...
Adding the following met

# For lung-cancer

In [6]:
files = os.listdir('./config/experiments')
files = [file for file in files if 'C4' not in file and 'gnn_gnn' in file]
print(files)

['gnn_gnn_expression_marker_genes_cgp.json', 'gnn_gnn_expression_hvg_cgp.json', 'gnn_gnn_cnv_cgp.json', 'gnn_gnn_MOSA_cgp.json']


In [7]:
for file in files:
    # Load configuration from the JSON file
    path = './config/experiments/' + file
    with open(path, 'r') as config_file:
        config = json.load(config_file)

    # Extract parameters with new structure
    graph_params = config["graph_parameters"]

    graph_creator = Create_heterogeneous_graph(
        BASE_PATH=graph_params["base_path"],
        cancer_type="Non-Small Cell Lung Cancer",
        cell_feature=graph_params["cell_feat_name"],
        gene_feature=graph_params["gene_feat_name"],
        metapaths=graph_params["metapaths"]
    )
    heterodata_obj = graph_creator.run_pipeline()


Loading data...
Continuing with Gene1 and Gene2 as columns for the nodes
14034 Nodes and 278974 interactions
Filtering data for network membership...
Filtering for informative genes...
Creating dependency network using informative genes...
Processing gene features...
Processing cell features...
Processing features for 98 cells, feature type: expression_marker_genes
['ACH-001137', 'ACH-001233', 'ACH-002156'] 
Applied StandardScaler normalization to cell features (mean=[1.08233429 6.74757681 0.83438839 0.57644046 0.67755345]..., var=[0.3696076  0.60742336 1.51528629 1.0750473  0.62718758]...)
Final network will have 13398 genes and 95 cell lines
Creating mutation network...
Loading mutation data from ./Data/Depmap/OmicsSomaticMutationsMatrixDamaging.csv (damaging) and ./Data/Depmap/OmicsSomaticMutationsMatrixHotspot.csv (hotspot)
Created mutation network with 4976 edges from damaging and hotspot data.
Adding metapaths to the graph...
Adding the following metapaths: [[('cell', 'has_mutati

# Load graphs

In [11]:
graphs = os.listdir('./Data/multigraphs/')
for graph in graphs:
    path = './Data/multigraphs/' + graph
    het_graph = torch.load(path)
    print(graph)
    print(het_graph)


heteroData_gene_cell_Neuroblastoma_cgp_expression_marker_genes_META2.pt
HeteroData(
  metapath_dict={ (cell, metapath_0, cell)=[2] },
  gene={
    node_id=[13398],
    names=[13398],
    x=[13398, 3438],
  },
  cell={
    node_id=[32],
    names=[32],
    x=[32, 3000],
  },
  (gene, interacts_with, gene)={ edge_index=[2, 263122] },
  (gene, dependency_of, cell)={
    edge_index=[2, 29438],
    edge_label=[29438],
  },
  (cell, has_mutation_in, gene)={
    edge_index=[2, 588],
    edge_attr=[588],
  },
  (gene, rev_interacts_with, gene)={ edge_index=[2, 263122] },
  (cell, rev_dependency_of, gene)={
    edge_index=[2, 29438],
    edge_label=[29438],
  },
  (gene, rev_has_mutation_in, cell)={
    edge_index=[2, 588],
    edge_attr=[588],
  },
  (cell, metapath_0, cell)={
    edge_index=[2, 238],
    edge_weight=[238],
  }
)
heteroData_gene_cell_Neuroblastoma_cgp_expression_hvg_META2.pt
HeteroData(
  metapath_dict={ (cell, metapath_0, cell)=[2] },
  gene={
    node_id=[13398],
    names=[