# Set Up

In [1]:
import gseapy as gp
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import seaborn as sns
import decoupler
import squidpy
import warnings
import re
import os

xenium_dir="/home/workspace/EXP-00971/figures/xenium_mapping"

core_program_dir=os.path.join(xenium_dir, "core_program_genes")
os.makedirs(core_program_dir, exist_ok=True)

ndmm_dirs = []

base_dir = "/home/workspace/EXP-00971/figures/xenium_mapping"
for i in range(9):
    ndmm_dir = f"{base_dir}/ndmm{i}"
    ndmm_dirs.append(ndmm_dir)
    os.makedirs(ndmm_dir, exist_ok=True)

  from .autonotebook import tqdm as notebook_tqdm
  from pkg_resources import DistributionNotFound, get_distribution


In [2]:
vis_data = "/home/workspace/EXP-00971/TIS05393-001-010-EXP-00971-SB005/outs"

adata = sc.read_h5ad('filtered_adata.h5ad')

# Setting spatial coordinates

In [3]:
spatial_coords = adata.obsm['spatial']

x_coords = spatial_coords[:, 0]
y_coords = spatial_coords[:, 1]

x_range = x_coords.max() - x_coords.min()
y_range = y_coords.max() - y_coords.min()

crop_coord = (
    x_coords.min() + 0.2 * x_range,
    x_coords.max(),
    y_coords.min() + 0.4 * y_range,
    y_coords.max()
)

# Xenium cluster gene list

In [4]:
# Genes specifically mentioned as shared across most clusters (core program)
# PURPL not present in visium data
core_program_genes = [
    "PRDM5", "RBFOX2", "TNS3", "DST", "MAGI2", "CNTN5", "EPHA6", "BTBD3", 
    "CRYBG3", "SPEF2", "NDNF", "DKK1", "HGF", "ATP10B", "MGAT4C", "CADPS2", "PURPL"
]

# Genes parsed out by specific NDMM clusters
ndmm_clusters = {
    "NDMM_0": ["PAM", "PSAT1", "DNAJC1", "RMDN3", "SCAMP5", "SEPT10", "ACVR1C"],
    "NDMM_1": ["NOLC1", "NOL8", "DROSHA", "GEMIN6", "CTU1", "SUGP2", "CAMSAP2", 
               "FKBP15", "AKAP1", "MAN2A1", "PPAT", "PPRC1", "BDH1"],
    "NDMM_2": ["BFSP2", "BTD", "C11Orf1", "C2", "SMIM4", "SVOP"],
    "NDMM_3": ["TET1", "DUSP6", "PAK1", "PLCG2"],
    "NDMM_4": ["IFIT1", "IFIT3", "HERC5", "TNFSF10"],
    "NDMM_5": ["CST7", "GNLY", "MINDY2", "EMB", "SCRN1"],
    "NDMM_6": ["SETD2", "KAT7", "HDAC9", "SIRT1", "CEBPG", "CREB5", "TFDP2", "ELF2"],
    "NDMM_7": ["ARHGEF28", "ARHGAP23", "RAPH1", "DAAM2", "DOCK3", "DOCK4", 
               "COL1A2", "LAMA2", "THSD7A", "THSD7B", "RELN", "CAMKK1", 
               "PRKG1", "PLCB1", "DIO2", "ERBB4", "LGR4", "PTPRM", "PTPRZ1"],
    "NDMM_8": ["BRCA1", "CENPU", "TOP2A", "MKI67"]
}

# core program genes

In [8]:
sc.set_figure_params(figsize=(10, 10))

available_genes = [g for g in core_program_genes if g in adata.var_names]

sc.tl.score_genes(
    adata,
    gene_list=core_program_genes,
    score_name='core_program_score'
)

sc.pl.spatial(
    adata,
    img_key="hires",
    color="core_program_score",
    vmax="p99",
    # vmin="p5",
    alpha_img=0.7,
    size=0.8,
    color_map="RdBu",
    show=False,
    title=f'core program gene signature score',
    crop_coord=crop_coord,
    colorbar_loc="bottom"
)
plt.savefig(f'{core_program_dir}/core_program_gene_sig_score.png', dpi=300, bbox_inches='tight')
plt.close()

for gene in available_genes:
    sc.pl.spatial(
        adata,
        img_key="hires",
        color=gene,
        vmax="p99",
        # vmin="p5",
        alpha_img=0.7,
        size=0.8,
        color_map="RdBu",
        show=False,
        crop_coord=crop_coord,
        colorbar_loc="bottom"
    )
    plt.savefig(f'{core_program_dir}/{gene}_TIS05393-001-010.pdf', dpi=300, bbox_inches='tight')
    plt.close()



# NDMM clusters

In [7]:
for i, (cluster, genes) in enumerate(ndmm_clusters.items()):
    available_genes = [g for g in genes if g in adata.var_names]
    
    sc.tl.score_genes(
        adata,
        gene_list=available_genes,
        score_name=cluster
    )

    # Plot signature score
    sc.pl.spatial(
        adata,
        img_key="hires",
        color=cluster,
        vmax="p99",
        alpha_img=0.7,
        size=0.8,
        color_map="RdBu",
        title=f'{cluster} gene signature score',
        show=False,
        crop_coord=crop_coord,
        colorbar_loc="bottom"
    )
    plt.savefig(f'{ndmm_dirs[i]}/{cluster}_signature.pdf', dpi=300, bbox_inches='tight')
    plt.close()

    for gene in available_genes:
        sc.pl.spatial(
            adata,
            img_key="hires",
            color=gene,
            vmax="p99",
            alpha_img=0.7,
            size=0.8,
            color_map="RdBu",
            title=gene,
            show=False,
            crop_coord=crop_coord,
            colorbar_loc="bottom"
        )
        plt.savefig(f'{ndmm_dirs[i]}/{gene}_TIS05393-001-010.pdf', dpi=300, bbox_inches='tight')
        plt.close()