In [1]:
import scanpy as sc
import starfysh as sf

adata = sc.read_h5ad("../data/visium_mouse_brain_heat.h5ad")


In [None]:
import pandas as pd
import numpy as np

# Build gene_sig
cm = pd.read_csv("../data/cell_markers/normalized_cellmarkers2.csv")

marker_dict = (
    cm.groupby("cell_type")["marker_gene"]
      .apply(list)
      .to_dict()
)

all_genes = sorted({g for genes in marker_dict.values() for g in genes})

gene_sig = pd.DataFrame(
    0,
    index=all_genes,
    columns=marker_dict.keys(),
    dtype=int,
)
for ct, genes in marker_dict.items():
    gene_sig.loc[genes, ct] = 1

# Clean strings and compute overlap with VAR NAMES
adata.var["gene_ids"] = adata.var["gene_ids"].astype(str).str.strip()
gene_sig.index = gene_sig.index.astype(str).str.strip()

common_genes = gene_sig.index.intersection(adata.var_names)
print("Number of shared genes:", len(common_genes))

# Subset both objects to shared genes
adata_sub   = adata[:, common_genes].copy()
gene_sig_sub = gene_sig.loc[common_genes].copy()


Number of shared genes: 2725


In [None]:
suffix = "_heat3"   #used in heat_diffuse_genes

# Columns in adata.obs that are heat-diffused genes
heat_cols = [c for c in adata.obs.columns if c.endswith(suffix)]
# The gene names underlying those columns (strip suffix)
heat_genes = pd.Index([c[:-len(suffix)] for c in heat_cols])

# Overlap between marker genes and heat-diffused genes
common_genes_heat = gene_sig.index.intersection(heat_genes)
print("Heat-diffused shared genes:", len(common_genes_heat))  # should be ~2725

# Build matrix (spots x genes) of heat-diffused values
heat_mat = adata.obs[[f"{g}{suffix}" for g in common_genes_heat]].to_numpy()

import anndata as ad

adata_heat = ad.AnnData(
    X=heat_mat,
    obs=adata.obs.copy(),                       # same spots / metadata
    var=pd.DataFrame(index=common_genes_heat),  # marker+heat genes as var
)

#gene_sig restricted to these genes
gene_sig_heat = gene_sig.loc[common_genes_heat].copy()


Heat-diffused shared genes: 2725


In [4]:
available = set(common_genes_heat)

marker_dict_heat = {
    ct: [g for g in genes if g in available]
    for ct, genes in marker_dict.items()
}

# Optional sanity check
print("Cell types:", len(marker_dict_heat))
print("Example:", next(iter(marker_dict_heat.items())))


Cell types: 40
Example: ('Activated neural stem cell', ['Csf3', 'Fbxo2', 'Fxyd1', 'Gfap', 'Id3', 'Thbs4'])


In [5]:
from starfysh import AA, utils, plot_utils, post_analysis
import starfysh as sf_model


  from .autonotebook import tqdm as notebook_tqdm


ModuleNotFoundError: No module named 'large_image'