In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import celloracle as co
from celloracle import motif_analysis as ma
from celloracle.utility import save_as_pickled_object
from genomepy import install_genome

  from pkg_resources import get_distribution, DistributionNotFound


In [2]:
# Set plotting parameters
plt.rcParams['figure.figsize'] = [6, 4.5]
plt.rcParams["savefig.dpi"] = 300

# Scanpy settings
sc.settings.verbosity = 3
sc.settings.set_figure_params(dpi=80, facecolor='white', frameon=False)

print(f"CellOracle version: {co.__version__}")
print(f"Scanpy version: {sc.__version__}")

CellOracle version: 0.20.0
Scanpy version: 1.10.1


In [3]:
adata = sc.read_h5ad("CTR9_snRNASeq/CTR9_snRNASeq_full.h5ad")
print(f"Loaded data: {adata.shape[0]} cells x {adata.shape[1]} genes")
print(adata)

Loaded data: 9869 cells x 33696 genes
AnnData object with n_obs × n_vars = 9869 × 33696
    obs: 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'sample', 'RNA_snn_res.0.5', 'seurat_clusters', 'RNA_snn_res.0.1', 'RNA_snn_res.1', 'RNA_snn_res.0.2', 'cluster_annot'


In [4]:
print("Sample distribution:")
print(adata.obs['sample'].value_counts())
print()

# Check unique values
print("Unique sample values:", adata.obs['sample'].unique().tolist())

# Check if both WT and KO are present
has_wt = adata.obs['sample'].str.contains('WT', case=False).any()
has_ko = adata.obs['sample'].str.contains('KO', case=False).any()

if has_wt and has_ko:
    print("\n⚠️  Data contains BOTH WT and KO samples.")
    print("   CellOracle paper recommends using WT-only data for simulation.")
    print("   Consider subsetting to WT cells before running Oracle.")
elif has_wt:
    print("\n✓ Data contains only WT samples - good for CellOracle.")
else:
    print("\n⚠️  Could not identify WT/KO status. Check sample labels manually.")

Sample distribution:
WT_DM    4981
KO_DM    4888
Name: sample, dtype: int64

Unique sample values: ['WT_DM', 'KO_DM']

⚠️  Data contains BOTH WT and KO samples.
   CellOracle paper recommends using WT-only data for simulation.
   Consider subsetting to WT cells before running Oracle.


In [5]:
# Visualize cell type and sample distribution
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Cell type counts
adata.obs['cluster_annot'].value_counts().plot(kind='barh', ax=axes[0])
axes[0].set_xlabel('Number of cells')
axes[0].set_title('Cell Type Distribution')

# Sample distribution per cell type
pd.crosstab(adata.obs['cluster_annot'], adata.obs['sample']).plot(kind='barh', stacked=True, ax=axes[1])
axes[1].set_xlabel('Number of cells')
axes[1].set_title('Sample Distribution by Cell Type')
axes[1].legend(title='Sample')

plt.tight_layout()
plt.savefig('figures/cell_type_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

In [6]:
if 'X_umap' in adata.obsm.keys():
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    sc.pl.umap(adata, color='cluster_annot', ax=axes[0], show=False, legend_loc='on data')
    sc.pl.umap(adata, color='sample', ax=axes[1], show=False)
    
    plt.tight_layout()
    plt.savefig('figures/umap_overview.png', dpi=300, bbox_inches='tight')
    plt.show()
else:
    print("No UMAP found - will compute later")

No UMAP found - will compute later


In [7]:
# Load TF info which was made from mouse cell atlas dataset.
base_GRN = co.data.load_mouse_scATAC_atlas_base_GRN()

# Check data
base_GRN.head()

Unnamed: 0,peak_id,gene_short_name,9430076c15rik,Ac002126.6,Ac012531.1,Ac226150.2,Afp,Ahr,Ahrr,Aire,...,Znf784,Znf8,Znf816,Znf85,Zscan10,Zscan16,Zscan22,Zscan26,Zscan31,Zscan4
0,chr10_100050979_100052296,4930430F08Rik,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,chr10_101006922_101007748,SNORA17,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,chr10_101144061_101145000,Mgat4c,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,chr10_10148873_10149183,9130014G24Rik,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,chr10_10149425_10149815,9130014G24Rik,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Instantiate Oracle object
oracle = co.Oracle()

In [9]:
print("Metadata columns:")
print(adata.obs.columns.tolist())
print("\nDimensional reduction: ")
print(list(adata.obsm.keys()))
print("\nSample distribution:")
print(adata.obs['sample'].value_counts())
print("\nCell type distribution:")
print(adata.obs['cluster_annot'].value_counts())

Metadata columns:
['orig.ident', 'nCount_RNA', 'nFeature_RNA', 'sample', 'RNA_snn_res.0.5', 'seurat_clusters', 'RNA_snn_res.0.1', 'RNA_snn_res.1', 'RNA_snn_res.0.2', 'cluster_annot']

Dimensional reduction: 
[]

Sample distribution:
WT_DM    4981
KO_DM    4888
Name: sample, dtype: int64

Cell type distribution:
Epi_Kit+Elf5+           1811
Adipocyte               1802
Tcells                  1333
BasalEpi_Acta2+Trp63    1066
Epi_Ctr9+                943
Fibroblasts              908
Bcells                   519
Endothelials             442
Myeloid_cells            404
Epi_proliferating        221
DCs                      162
Pericytes/SMC            142
SMC?                      79
Schwann?                  37
Name: cluster_annot, dtype: int64


In [10]:
# Load the updated h5ad file with raw counts layer
adata = sc.read_h5ad("CTR9_snRNASeq/CTR9_snRNASeq_with_raw.h5ad")
print(f"Full data: {adata.shape[0]} cells x {adata.shape[1]} genes")
print(adata.obs['sample'].value_counts())

Full data: 9869 cells x 33696 genes
WT_DM    4981
KO_DM    4888
Name: sample, dtype: int64


In [12]:
print(list(adata.obsm.keys()))

[]


In [13]:
# Save raw counts before preprocessing
adata.layers["raw_count"] = adata.X.copy()

# Preprocess for embedding computation
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=3000)
sc.tl.pca(adata)
sc.pp.neighbors(adata)
sc.tl.umap(adata)

# Restore raw counts in X (CellOracle requires this)
adata.X = adata.layers["raw_count"].copy()

oracle.import_anndata_as_raw_count(adata=adata,
                                   cluster_column_name="cluster_annot",
                                   embedding_name="X_umap")

normalizing counts per cell
    finished (0:00:00)
extracting highly variable genes
    finished (0:00:00)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
computing PCA
    with n_comps=50
    finished (0:00:00)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    `.obsp['distances']`, distances for each pair of neighbors
    `.obsp['connectivities']`, weighted adjacency matrix (0:00:15)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:07)
33696 genes were found in the adata. Note that Celloracle is intended to use around 1000-3000 genes, so the behavior with this number of genes may differ from what is expected.


In [14]:
oracle.import_TF_data(TF_info_matrix=base_GRN)

In [15]:
# !wget https://raw.githubusercontent.com/morris-lab/CellOracle/master/docs/demo_data/TF_data_in_Paul15.csv

In [16]:
# Load the TF and target gene information from Paul et al. (2015).
Paul_15_data = pd.read_csv("TF_data_in_Paul15.csv")
Paul_15_data


Unnamed: 0,TF,Target_genes
0,Cebpa,"Abcb1b, Acot1, C3, Cnpy3, Dhrs7, Dtx4, Edem2, ..."
1,Irf8,"Abcd1, Aif1, BC017643, Cbl, Ccdc109b, Ccl6, d6..."
2,Irf8,"1100001G20Rik, 4732418C07Rik, 9230105E10Rik, A..."
3,Klf1,"2010011I20Rik, 5730469M10Rik, Acsl6, Add2, Ank..."
4,Spi1,"0910001L09Rik, 2310014H01Rik, 4632428N05Rik, A..."


In [17]:
# Make dictionary: dictionary key is TF and dictionary value is list of target genes.
TF_to_TG_dictionary = {}

for TF, TGs in zip(Paul_15_data.TF, Paul_15_data.Target_genes):
    # convert target gene to list
    TG_list = TGs.replace(" ", "").split(",")
    # store target gene list in a dictionary
    TF_to_TG_dictionary[TF] = TG_list

# We invert the dictionary above using a utility function in celloracle.
TG_to_TF_dictionary = co.utility.inverse_dictionary(TF_to_TG_dictionary)

  0%|          | 0/178 [00:00<?, ?it/s]

In [18]:
# Add TF information 
oracle.addTFinfo_dictionary(TG_to_TF_dictionary)

In [19]:
# Perform PCA
oracle.perform_PCA()

# Select important PCs
plt.plot(np.cumsum(oracle.pca.explained_variance_ratio_)[:100])
n_comps = np.where(np.diff(np.diff(np.cumsum(oracle.pca.explained_variance_ratio_))>0.002))[0][0]
plt.axvline(n_comps, c="k")
plt.show()

In [20]:
print(n_comps)
n_comps = min(n_comps, 50)

12


In [21]:
n_cell = oracle.adata.shape[0]
print(f"cell number is :{n_cell}")



k = int(0.025*n_cell)
print(f"Auto-selected k is :{k}")

cell number is :9869
Auto-selected k is :246


In [22]:
oracle.knn_imputation(n_pca_dims=n_comps, k=k, balanced=True, b_sight=k*8,b_maxl=k*4, n_jobs=4)

In [23]:
# Save oracle object.
oracle.to_hdf5("ctr9_WT_KO.celloracle.oracle")

# Load file.
oracle = co.load_hdf5("ctr9_WT_KO.celloracle.oracle")

In [24]:
sc.pl.umap(oracle.adata, color="cluster_annot")

In [25]:
links = oracle.get_links(cluster_name_for_GRN_unit="cluster_annot", 
                         alpha=10, verbose_level=10)

  0%|          | 0/14 [00:00<?, ?it/s]

Inferring GRN for Adipocyte...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for BasalEpi_Acta2+Trp63...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Bcells...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for DCs...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Endothelials...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Epi_Ctr9+...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Epi_Kit+Elf5+...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Epi_proliferating...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Fibroblasts...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Myeloid_cells...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Pericytes/SMC...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for SMC?...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Schwann?...


  0%|          | 0/16761 [00:00<?, ?it/s]

Inferring GRN for Tcells...


  0%|          | 0/16761 [00:00<?, ?it/s]

In [26]:
# Save the links object
links.to_hdf5(file_path="celloracle_results/ctr9_WT_KO_links.celloracle.links")

In [27]:
# Check which clusters have GRNs
print("Clusters with GRNs:")
print(list(links.links_dict.keys()))
print(f"\nTotal clusters: {len(links.links_dict)}")

# Check number of links in each cluster (before filtering)
print("\nNumber of regulatory links per cluster (unfiltered):")
for cluster in links.links_dict.keys():
    n_links = len(links.links_dict[cluster])
    n_tfs = links.links_dict[cluster]['source'].nunique()
    n_targets = links.links_dict[cluster]['target'].nunique()
    print(f"{cluster:20s}: {n_links:6d} links | {n_tfs:4d} TFs | {n_targets:4d} targets")

Clusters with GRNs:
['Adipocyte', 'BasalEpi_Acta2+Trp63', 'Bcells', 'DCs', 'Endothelials', 'Epi_Ctr9+', 'Epi_Kit+Elf5+', 'Epi_proliferating', 'Fibroblasts', 'Myeloid_cells', 'Pericytes/SMC', 'SMC?', 'Schwann?', 'Tcells']

Total clusters: 14

Number of regulatory links per cluster (unfiltered):
Adipocyte           : 4652568 links |  805 TFs | 16725 targets
BasalEpi_Acta2+Trp63: 4652568 links |  805 TFs | 16725 targets
Bcells              : 4652568 links |  805 TFs | 16725 targets
DCs                 : 4652568 links |  805 TFs | 16725 targets
Endothelials        : 4652568 links |  805 TFs | 16725 targets
Epi_Ctr9+           : 4652568 links |  805 TFs | 16725 targets
Epi_Kit+Elf5+       : 4652568 links |  805 TFs | 16725 targets
Epi_proliferating   : 4652568 links |  805 TFs | 16725 targets
Fibroblasts         : 4652568 links |  805 TFs | 16725 targets
Myeloid_cells       : 4652568 links |  805 TFs | 16725 targets
Pericytes/SMC       : 4652568 links |  805 TFs | 16725 targets
SMC?        

In [28]:
# Examine structure of GRN dataframe
cluster_example = list(links.links_dict.keys())[0]
print(f"\nExample GRN structure from '{cluster_example}':")
print(links.links_dict[cluster_example].head(10))
print(f"\nColumns: {links.links_dict[cluster_example].columns.tolist()}")


Example GRN structure from 'Adipocyte':
   source         target  coef_mean  coef_abs             p      -logp
0    Xbp1  0610005C13Rik   0.000084  0.000084  5.927990e-01   0.227093
1  Trim28  0610005C13Rik  -0.001670  0.001670  2.376240e-08   7.624110
2    Egr3  0610005C13Rik  -0.000669  0.000669  9.585183e-03   2.018400
3    Chd2  0610005C13Rik  -0.001152  0.001152  1.243600e-05   4.905319
4     Maz  0610005C13Rik  -0.003121  0.003121  1.806310e-11  10.743208
5   Nr2c1  0610005C13Rik  -0.002901  0.002901  2.734019e-09   8.563199
6    Rxrb  0610005C13Rik  -0.000438  0.000438  4.897071e-02   1.310064
7     Sp4  0610005C13Rik  -0.000622  0.000622  4.750593e-03   2.323252
8    Etv3  0610005C13Rik   0.000309  0.000309  2.660667e-01   0.575010
9    Gli2  0610005C13Rik  -0.000442  0.000442  1.439623e-02   1.841751

Columns: ['source', 'target', 'coef_mean', 'coef_abs', 'p', '-logp']


In [29]:
cluster_name = "Epi_Ctr9+"  
grn_df = links.links_dict[cluster_name]

print(grn_df.head(20))
print(f"\nShape: {grn_df.shape}")
print(f"\nColumns: {grn_df.columns.tolist()}")

     source         target  coef_mean  coef_abs             p      -logp
0      Xbp1  0610005C13Rik   0.000058  0.000058  2.173749e-01   0.662791
1    Trim28  0610005C13Rik  -0.000620  0.000620  1.468192e-05   4.833217
2      Egr3  0610005C13Rik  -0.001017  0.001017  6.455308e-09   8.190083
3      Chd2  0610005C13Rik  -0.000026  0.000026  5.166649e-01   0.286791
4       Maz  0610005C13Rik  -0.000033  0.000033  7.537126e-01   0.122794
5     Nr2c1  0610005C13Rik  -0.000190  0.000190  4.258274e-03   2.370766
6      Rxrb  0610005C13Rik  -0.000332  0.000332  8.341846e-04   3.078738
7       Sp4  0610005C13Rik   0.000052  0.000052  5.334824e-01   0.272880
8      Etv3  0610005C13Rik   0.000892  0.000892  3.178249e-12  11.497812
9      Gli2  0610005C13Rik   0.000173  0.000173  1.149432e-04   3.939517
10     Rela  0610005C13Rik  -0.000187  0.000187  3.936711e-02   1.404866
11     Tfeb  0610005C13Rik   0.000143  0.000143  4.959003e-02   1.304606
12    Foxn3  0610005C13Rik   0.000222  0.000222  3.

In [30]:
# Save GRN for specific cluster as CSV
# cluster_name = "Epi_Ctr9+"
# links.links_dict[cluster_name].to_csv(f"grn_results/GRN_{cluster_name}.csv", index=False)

# save all clusters
# for cluster in links.links_dict.keys():
#     links.links_dict[cluster].to_csv(f"grn_results/GRN_{cluster}.csv", index=False)

In [31]:
# Filter links based on p-value and coefficient threshold
links.filter_links(p=0.001,          # P-value threshold
                   weight="coef_abs", # Use absolute coefficient
                   threshold_number=2000)  # Keep top 2000 links per cluster

In [32]:
# Check number of links after filtering
print("\nNumber of regulatory links per cluster (after filtering):")
for cluster in links.links_dict.keys():
    n_links = len(links.links_dict[cluster])
    n_tfs = links.links_dict[cluster]['source'].nunique()
    n_targets = links.links_dict[cluster]['target'].nunique()
    print(f"{cluster:20s}: {n_links:6d} links | {n_tfs:4d} TFs | {n_targets:4d} targets")


Number of regulatory links per cluster (after filtering):
Adipocyte           : 4652568 links |  805 TFs | 16725 targets
BasalEpi_Acta2+Trp63: 4652568 links |  805 TFs | 16725 targets
Bcells              : 4652568 links |  805 TFs | 16725 targets
DCs                 : 4652568 links |  805 TFs | 16725 targets
Endothelials        : 4652568 links |  805 TFs | 16725 targets
Epi_Ctr9+           : 4652568 links |  805 TFs | 16725 targets
Epi_Kit+Elf5+       : 4652568 links |  805 TFs | 16725 targets
Epi_proliferating   : 4652568 links |  805 TFs | 16725 targets
Fibroblasts         : 4652568 links |  805 TFs | 16725 targets
Myeloid_cells       : 4652568 links |  805 TFs | 16725 targets
Pericytes/SMC       : 4652568 links |  805 TFs | 16725 targets
SMC?                : 4652568 links |  805 TFs | 16725 targets
Schwann?            : 4652568 links |  805 TFs | 16725 targets
Tcells              : 4652568 links |  805 TFs | 16725 targets


In [33]:
# Calculate network scores (degree centrality, betweenness, etc.)
print("Calculating network scores...")
links.get_network_score()
print("✓ Network scores calculated")

# Display score types
print("\nScore metrics available:")
print(links.merged_score.columns.tolist())

# Show example scores
print("\nExample network scores:")
print(links.merged_score.head(20))

Calculating network scores...
✓ Network scores calculated

Score metrics available:
['degree_all', 'degree_centrality_all', 'degree_in', 'degree_centrality_in', 'degree_out', 'degree_centrality_out', 'betweenness_centrality', 'eigenvector_centrality', 'cluster']

Example network scores:
               degree_all  degree_centrality_all  degree_in  \
Smarcc2                15               0.020862          0   
Fabp4                  18               0.025035         18   
Ebf1                  140               0.194715         48   
Plin1                  17               0.023644         17   
Rreb1                  38               0.052851          0   
Rgs7                    5               0.006954          5   
Rxra                   14               0.019471          0   
Neat1                  10               0.013908         10   
Stat3                   9               0.012517          0   
Maml2                  15               0.020862         15   
Gata3              

In [34]:
# Plot top genes by degree centrality for each cluster
N_TOP_GENES = 20

# Check if network scores exist
if not hasattr(links, 'merged_score') or links.merged_score is None:
    print("⚠ Error: Network scores not calculated yet!")
    print("Please run: links.get_network_score()")
else:
    fig, axes = plt.subplots(3, 4, figsize=(20, 15))
    axes = axes.flatten()
    
    for idx, cluster in enumerate(sorted(links.links_dict.keys())):
        print(cluster)
        if idx < len(axes):
            # Get scores for this cluster
            cluster_scores = links.merged_score.loc[
                links.merged_score['cluster'] == cluster,
                'degree_centrality_all'
            ].sort_values(ascending=False).head(N_TOP_GENES)
            
            # Plot
            cluster_scores.plot(kind='barh', ax=axes[idx], color='steelblue')
            axes[idx].set_title(f'{cluster}', fontsize=10, fontweight='bold')
            axes[idx].set_xlabel('Degree Centrality', fontsize=8)
            axes[idx].invert_yaxis()
            axes[idx].tick_params(labelsize=7)
    
    # Hide unused subplots
    for idx in range(len(links.links_dict.keys()), len(axes)):
        axes[idx].axis('off')
    
    plt.tight_layout()
    plt.savefig('figures/network_analysis/top_genes_per_cluster.png', dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✓ Saved: figures/network_analysis/top_genes_per_cluster.png")


Adipocyte
BasalEpi_Acta2+Trp63
Bcells
DCs
Endothelials
Epi_Ctr9+
Epi_Kit+Elf5+
Epi_proliferating
Fibroblasts
Myeloid_cells
Pericytes/SMC
SMC?
Schwann?
Tcells
✓ Saved: figures/network_analysis/top_genes_per_cluster.png


In [35]:
# Create heatmap of network scores across clusters
N_GENES_HEATMAP = 50
if not hasattr(links, 'merged_score') or links.merged_score is None:
    print("⚠ Error: Network scores not calculated yet!")
    print("Please run: links.get_network_score()")
else:
    # Get top genes per cluster
    top_genes = set()
    for cluster in links.links_dict.keys():
        cluster_mask = links.merged_score['cluster'] == cluster
        cluster_scores = links.merged_score.loc[cluster_mask, 'degree_centrality_all'].sort_values(ascending=False)
        top_genes.update(cluster_scores.head(N_GENES_HEATMAP).index)

    print(f"Creating heatmap with {len(top_genes)} unique genes...")

    # Create pivot table for heatmap
    score_df = links.merged_score[['cluster', 'degree_centrality_all']].copy()
    score_df = score_df[score_df.index.isin(top_genes)]
    pivot_table = score_df.pivot(columns='cluster', values='degree_centrality_all')

    # Plot heatmap
    plt.figure(figsize=(14, 20))
    sns.heatmap(pivot_table, cmap='viridis', cbar_kws={'label': 'Degree Centrality'},
                linewidths=0.5, linecolor='gray')
    plt.title(f'Network Degree Centrality\nTop {N_GENES_HEATMAP} Genes per Cluster',
              fontsize=14, fontweight='bold')
    plt.xlabel('Cluster', fontsize=12)
    plt.ylabel('Gene', fontsize=12)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig('figures/network_analysis/network_score_heatmap.png', dpi=300, bbox_inches='tight')
    plt.show()
    print("✓ Saved: figures/network_analysis/network_score_heatmap.png")

Creating heatmap with 252 unique genes...
✓ Saved: figures/network_analysis/network_score_heatmap.png


In [36]:
# Select two clusters to compare (adjust these to your clusters of interest)
CLUSTER1 = "Epi_Ctr9+" 
CLUSTER2 = "Epi_Kit+Elf5+"

# Check if clusters exist
available_clusters = list(links.links_dict.keys())
if CLUSTER1 in available_clusters and CLUSTER2 in available_clusters:
    fig, ax = plt.subplots(figsize=(12, 10))
    
    links.plot_score_comparison_2D(
        value="degree_centrality_all",
        cluster1=CLUSTER1,
        cluster2=CLUSTER2,
        save=None
    )
    
    plt.tight_layout()
    plt.savefig(f'figures/network_analysis/score_comparison_{CLUSTER1}_vs_{CLUSTER2}.png', 
                dpi=300, bbox_inches='tight')
    plt.show()
    print(f"✓ Saved comparison plot")
else:
    print(f"⚠ Cluster not found. Available clusters: {available_clusters}")
    print("Please modify CLUSTER1 and CLUSTER2 in the cell above.")

✓ Saved comparison plot
