In [None]:
import os
os.environ['OPENBLAS_NUM_THREADS'] = '1'
os.environ['MKL_NUM_THREADS'] = '1'
os.environ['OMP_NUM_THREADS'] = '1'
os.environ['NUMEXPR_NUM_THREADS'] = '1'

import sys
import gc
import psutil
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import seaborn as sns
import celloracle as co

plt.rcParams["figure.figsize"] = [6, 6]
plt.rcParams["savefig.dpi"] = 300

save_folder = "figures/simulation"
os.makedirs(save_folder, exist_ok=True)

In [None]:
# Memory monitoring function
def print_memory_usage():
    process = psutil.Process(os.getpid())
    mem_gb = process.memory_info().rss / 1024**3
    available_gb = psutil.virtual_memory().available / 1024**3
    total_gb = psutil.virtual_memory().total / 1024**3
    print(f"Current usage: {mem_gb:.2f} GB | Available: {available_gb:.2f} GB | Total: {total_gb:.2f} GB")

# Check system resources
print("System Resources:")
print_memory_usage()
print(f"CPU cores: {psutil.cpu_count()}")

In [None]:
# Load Oracle object
print("Loading Oracle object...")
print_memory_usage()

oracle = co.load_hdf5("celloracle_results/ctr9_WT_KO.celloracle.oracle")
print(oracle)

print("\nAfter loading Oracle:")
print_memory_usage()

# Load Links object
print("\nLoading Links object...")
links = co.load_hdf5("celloracle_results/ctr9_WT_KO_links.celloracle.links")
print("Clusters with GRNs:", list(links.links_dict.keys()))
print(f"Number of clusters: {len(links.links_dict.keys())}")

print("\nAfter loading Links:")
print_memory_usage()

# Force garbage collection
gc.collect()

In [None]:
# Filter links with monitoring
print("Filtering links...")
print_memory_usage()

links.filter_links(
    p=0.0001,  # More stringent p-value
    weight="coef_abs",
    threshold_number=500
)

print("\nAfter filtering:")
print_memory_usage()

# Get cluster-specific TF dictionaries
print("\nGetting cluster-specific TF dictionaries...")
oracle.get_cluster_specific_TFdict_from_Links(links_object=links)

print("After getting TF dicts:")
print_memory_usage()

gc.collect()

In [None]:
# Fit GRN for simulation - processes all clusters at once
print("Fitting GRN for simulation across all clusters...")
print(f"Clusters to process: {list(links.links_dict.keys())}")
print_memory_usage()

print("Fitting with very high regularization...")
oracle.fit_GRN_for_simulation(
    alpha=1000,  # Very high regularization
    use_cluster_specific_TFdict=True
)

print("âœ“ GRN fitting complete!")
print_memory_usage()