In [4]:
import os
import sys

# Add the root of the project to sys.path.
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

from meta_pert_dataset import (
    MetaPertDataset,
    load_metabolic_model,
    plot_subsystem_graph,
    print_dataset_metabolic_info,
)
from models.metabolic_model_transmet import print_subsystems_stats

from utils.filesystem import get_git_root

### Load metabolic model

The metabolic model object is reused in every dataset.

In [None]:
# Load the metabolic model
recon2_mat_model = load_metabolic_model("RECON2_mat")

print(recon2_mat_model)

print_subsystems_stats(recon2_mat_model)

## Metabolic analyses of the datasets

### Norman dataset

In [None]:
# Load the Norman dataset with the RECON2 metabolic model
norman_meta_ds = MetaPertDataset(
    name="norman",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)
# You can optionally directly load a model inside the class by setting
# model_name="RECON2_mat"

# Normalize the dataset.
norman_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
norman_meta_ds.most_variable_genes(n_top_genes=5000)

print_dataset_metabolic_info(norman_meta_ds, top_n_subsystems=10)

In [None]:
plot_subsystem_graph(
    "Chondroitin synthesis",
    norman_meta_ds,
    base_figsize=(5, 5),
    omit_single_direction_metabolites=True,
    differentiate_highly_variable_genes=True,
)

### Dixit dataset

In [None]:
# Load the Dixit dataset with the RECON2 metabolic model
dixit_meta_ds = MetaPertDataset(
    name="dixit",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
dixit_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
dixit_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information.
print_dataset_metabolic_info(dixit_meta_ds, top_n_subsystems=20)

### Adamson

In [None]:
# Load the Adamson dataset with the RECON2 metabolic model
adamson_meta_ds = MetaPertDataset(
    name="adamson",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
adamson_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
adamson_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information
print_dataset_metabolic_info(adamson_meta_ds, top_n_subsystems=20)

### Replogle rpe1

In [None]:
# Load the Replogle K562 dataset with the RECON2 metabolic model
replogle_rpe1_meta_ds = MetaPertDataset(
    name="replogle_rpe1_essential",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
replogle_rpe1_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
replogle_rpe1_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information
print_dataset_metabolic_info(replogle_rpe1_meta_ds, top_n_subsystems=20)


### Replogle K562

In [None]:
# Load the Replogle K562 dataset with the RECON2 metabolic model
replogle_k562_meta_ds = MetaPertDataset(
    name="replogle_k562_essential",
    variant="preprocessed",
    dir_path=os.path.join(get_git_root(), "datasets"),
    metabolic_model=recon2_mat_model,
)

# Normalize the dataset.
replogle_k562_meta_ds.normalize_(type="CPM")
# Adds information about the most variable genes to the dataset.
replogle_k562_meta_ds.most_variable_genes(n_top_genes=5000)
# Print the dataset metabolic information
print_dataset_metabolic_info(replogle_k562_meta_ds, top_n_subsystems=20)