In [12]:
import os
import sys

# Add the root of the project to sys.path.
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
from graph_visualization import plot_subsystem_graph
from meta_pert_dataset import MetaPertDataset
from metabolic_utils import print_dataset_metabolic_info, print_subsystems_stats
from models import init_model_transmet

from utils.filesystem import get_git_root

### Load metabolic model

The metabolic model object is reused in every dataset.

In [13]:
# Load the metabolic model
recon2_mat_model = init_model_transmet("RECON2_mat")

Initializing metabolic model 'RECON2_mat' with the following parameters:
  species: homo_sapiens
  media: default-media
  isoform_summing: remove-summing
  exchange_limit: 1.0
Metabolic model initialized successfully.
Removing empty gene associations...
Empty gene associations removed.
Converting gene symbols to Ensembl IDs...
Gene symbols converted to Ensembl IDs.
Metabolic model loading complete.


In [14]:
print(recon2_mat_model)

MetabolicModelTransmet object
    name: RECON2_mat
    Total number of subsystems: 100
    Total number of reactions: 10211
    Total number of associated genes: 1404


In [None]:
print_subsystems_stats(recon2_mat_model)

## Metabolic analyses of the datasets

### Norman dataset

In [16]:
# Load the Norman dataset with the RECON2 metabolic model
norman_meta_ds = MetaPertDataset(
    name="norman",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)
# You can optionally directly load a model inside the class by setting
# model_name="RECON2_mat"

Loading: /mnt/md0/data/gdufort/transmet/datasets/norman/preprocessed/adata.h5ad
Using provided metabolic model for dataset 'norman' with variant 'preprocessed'.
Number of unique perturbed genes: 105


In [17]:
# Normalize the dataset.
norman_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
norman_meta_ds.most_variable_genes(n_top_genes=5000)

print_dataset_metabolic_info(norman_meta_ds, top_n_subsystems=20)

Metabolic Model Information Report
Number of metabolic genes in the dataset: 1357
Number of metabolic genes in the dataset that are perturbed: 5
Number of metabolic genes among the most variable genes: 253
Number of metabolic genes among the most variable genes that are perturbed: 2
Percentage of variance captured by highly variable genes among all genes: 22.08%
Percentage of variance captured by metabolic genes among highly variable genes: 5.06%
Number of subsystems with metabolic genes in the dataset: 99/100
Number of subsystems with metabolic genes in the most variable genes: 83/100

Top 20 Subsystems by Variance Captured:
----------------------------------------
Subsystem: Transport, extracellular, Variance: 1798.04, Percentage of metabolic variance: 31.22%, Genes in Most Variable: 109/621, Perturbed Genes: 3/621
Subsystem: Transport, mitochondrial, Variance: 831.12, Percentage of metabolic variance: 14.43%, Genes in Most Variable: 49/227, Perturbed Genes: 2/227
Subsystem: Transpor

In [None]:
norman_meta_ds.load_compass_results("reactions.tsv")
norman_meta_ds.calculate_reaction_stats()

Plot an example metabolic graph associated to a subsystem. Highly variable genes are colored in yellow, the rest are gray. Reactions with no highly variable genes associated, and their connections are colored gray. Metabolites with no highly variable reactions associated are colored gray.

In [None]:
plot_subsystem_graph(
    "Glycolysis/gluconeogenesis",
    norman_meta_ds,
    plot_by_connected_component=True,
    omit_single_direction_metabolites_flag=False,
    differentiate_highly_variable_genes_flag=False,
    differentiate_perturbed_genes_flag=False,
    plot_reaction_graph_flag=False,
    differentiate_reaction_activation_mean_flag=False,
    differentiate_reaction_activation_variance_flag=False,
)

### Dixit dataset

In [None]:
# Load the Dixit dataset with the RECON2 metabolic model
dixit_meta_ds = MetaPertDataset(
    name="dixit",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
dixit_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
dixit_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information.
print_dataset_metabolic_info(dixit_meta_ds, top_n_subsystems=20)

In [None]:
plot_subsystem_graph(
    "Sphingolipid metabolism",
    dixit_meta_ds,
    base_figsize=(5, 5),
    omit_single_direction_metabolites=True,
    differentiate_highly_variable_genes=True,
    plot_reaction_graph=True,
)


### Adamson

In [None]:
# Load the Adamson dataset with the RECON2 metabolic model
adamson_meta_ds = MetaPertDataset(
    name="adamson",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
adamson_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
adamson_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information
print_dataset_metabolic_info(adamson_meta_ds, top_n_subsystems=20)

In [None]:
plot_subsystem_graph(
    "Sphingolipid metabolism",
    adamson_meta_ds,
    base_figsize=(5, 5),
    omit_single_direction_metabolites=True,
    differentiate_highly_variable_genes=True,
)


### Replogle rpe1

In [None]:
# Load the Replogle K562 dataset with the RECON2 metabolic model
replogle_rpe1_meta_ds = MetaPertDataset(
    name="replogle_rpe1_essential",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
replogle_rpe1_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
replogle_rpe1_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information
print_dataset_metabolic_info(replogle_rpe1_meta_ds, top_n_subsystems=20)


In [None]:
plot_subsystem_graph(
    "Sphingolipid metabolism",
    replogle_rpe1_meta_ds,
    base_figsize=(5, 5),
    omit_single_direction_metabolites=True,
    differentiate_highly_variable_genes=True,
)


### Replogle K562

In [None]:
# Load the Replogle K562 dataset with the RECON2 metabolic model
replogle_k562_meta_ds = MetaPertDataset(
    name="replogle_k562_essential",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
replogle_k562_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
replogle_k562_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information
print_dataset_metabolic_info(replogle_k562_meta_ds, top_n_subsystems=30)

In [None]:
plot_subsystem_graph(
    "Fatty acid synthesis",
    replogle_k562_meta_ds,
    plot_by_connected_component=True,
    differentiate_highly_variable_genes_flag=True,
    differentiate_perturbed_genes_flag=True,
    plot_reaction_graph_flag=False,
)
