In [None]:
import scanpy as sc
import squidpy as sq
import anndata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from collections import Counter
from matplotlib_scalebar.scalebar import ScaleBar
import os

import cell2location
import scvi

DPI = 300
FONTSIZE = 20  # 42
sc.settings.set_figure_params(
    scanpy=True, dpi=60, transparent=True, vector_friendly=True, dpi_save=DPI
)
from matplotlib import rcParams

rcParams["pdf.fonttype"] = 42

import seaborn as sns
from pathlib import Path

custom_params = {"font.size": FONTSIZE, "figure.dpi": 100, "savefig.dpi": DPI}
sns.set_theme(style="ticks", rc=custom_params)

# QC utils functions - a package
from vistools import utils
from vistools import spatial_plots

### Score for gene signatures to see where they go in spatial regions

In [None]:
SAMPLE_NAME = "CRC09_LM_VISIUM"

In [None]:
# DIR2SAVE
DIR2SAVE = Path(
    f"/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/cell2location/{SAMPLE_NAME}"
)
DIR2SAVE.mkdir(parents=True, exist_ok=True)

In [None]:
run_name = DIR2SAVE.joinpath(f"cell2location_map-no_cycling_TME/")
run_name.mkdir(parents=True, exist_ok=True)

In [None]:
# figures
FIG2SAVE = run_name.joinpath("figures/gene_signatures/")
FIG2SAVE.mkdir(parents=True, exist_ok=True)
# set the global variable: sc.settings.figdir to save all plots
sc.settings.figdir = FIG2SAVE

In [None]:
adata_file = f"{run_name}/sp.h5ad"
adata_vis = sc.read_h5ad(adata_file)
mod = cell2location.models.Cell2location.load(f"{run_name}", adata_vis)

In [None]:
### gene expression
# keep raw
adata_vis.layers["raw"] = adata_vis.X.copy()  # preserve counts

# normalize + log1p
sc.pp.normalize_total(adata_vis, target_sum=1e4, inplace=True)
adata_vis.layers["normalised"] = adata_vis.X.copy()
sc.pp.log1p(adata_vis)

adata_vis.layers["log1p"] = adata_vis.X.copy()

adata_vis.raw = adata_vis  # keep normalised log1p

In [None]:
adata_vis.shape

In [None]:
gene_sets = {
    "HALLMARK_HYPOXIA":  pd.read_table("/data/BCI-CRC/SO/genesets/HALLMARK_HYPOXIA.v7.5.1.grp"),
    "HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION": pd.read_table("/data/BCI-CRC/SO/genesets/HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION.v7.5.1.grp"),
    "COAD_pEMT": pd.read_csv("/data/BCI-CRC/SO/genesets/COAD_pEMT_genes_TylerTirosh2021_tumourSpecific.csv"),
    "HALLMARK_INTERFERON_ALPHA_RESPONSE": pd.read_table("/data/BCI-CRC/SO/genesets/HALLMARK_INTERFERON_ALPHA_RESPONSE_M5911.txt"),
    "HALLMARK_WNT_BETA_CATENIN_SIGNALING": pd.read_table("/data/BCI-CRC/SO/genesets/HALLMARK_WNT_BETA_CATENIN_SIGNALING.v7.5.1.grp"),
    "HALLMARK_INTERFERON_GAMMA_RESPONSE": pd.read_table("/data/BCI-CRC/SO/genesets/HALLMARK_INTERFERON_GAMMA_RESPONSE_M5913.txt")
}

In [None]:
gene_sets["COAD_pEMT"].rename(columns={"COAD": "COAD_pEMT"}, inplace=True)

In [None]:
for i in gene_sets.keys():
    
    for samp in set(adata_vis.obs.Sample):
        adata_tmp = utils.select_slide(adata_vis, s=samp, batch_key="Sample")
        sc.tl.score_genes(adata_tmp, gene_list=gene_sets[i][i], score_name=i,
                      use_raw=True)
        sc.pl.spatial(adata_tmp, color=i, size=1.3, img_key="hires", vmin="p25.0", vmax="p99.2", 
                  cmap="YlGnBu", alpha_img=0.3, save=f"{samp}_{i}_vmin.pdf")

In [None]:
for i in gene_sets.keys():
    
    for samp in set(adata_vis.obs.Sample):
        adata_tmp = utils.select_slide(adata_vis, s=samp, batch_key="Sample")
        sc.tl.score_genes(adata_tmp, gene_list=gene_sets[i][i], score_name=i,
                      use_raw=True)
        sc.pl.spatial(adata_tmp, color=i, size=1.3, img_key="hires", vmin=0, vmax="p99.2", 
                  cmap="YlGnBu", alpha_img=0.3, save=f"{samp}_{i}_vmin0.pdf") #vmin=0.2

### try signatures from CellCharter Marco Varrone, Giovanni Ciriello

In [None]:
cellcharter_sign = pd.read_excel("/data/BCI-CRC/nasrine/data/gene_sets/MarcoVarrone_GiovanniCiriello_CellCharter_media-4.xlsx",
                                 sheet_name="Sheet1",
                                 header=0,
             )

cellcharter_sign_dict = dict()
for col in cellcharter_sign.columns:
    cellcharter_sign_dict[col] = cellcharter_sign[col].dropna()

In [None]:
for i in cellcharter_sign_dict.keys():
    
    for samp in set(adata_vis.obs.Sample):
        adata_tmp = utils.select_slide(adata_vis, s=samp, batch_key="Sample")
        sc.tl.score_genes(adata_tmp, gene_list=cellcharter_sign_dict[i], score_name=i,
                      use_raw=True)
        sc.pl.spatial(adata_tmp, color=i, size=1.3, img_key="hires", vmin="p25.0", vmax="p99.2", 
                  cmap="YlGnBu", alpha_img=0.3, save=f"{samp}_{i}.pdf") # RdPu

### Load signatures from Hallmarks of transcriptional intratumour heterogeneity across a thousand tumours paper 2023 Gavish

In [None]:
gavish_sign = pd.read_excel("/data/BCI-CRC/nasrine/data/gene_sets/gavishHallmarksTranscriptio2023_41586_2023_6130_MOESM6_ESM.xlsx",
                            sheet_name="Cancer MPs",
                           )
gavish_sign.head(5)

In [None]:
gavish_sign.columns

In [None]:
for i in ['MP6 Hypoxia', 'MP12 EMT-I', 'MP13 EMT-II', 'MP14 EMT-III ', 'MP15 EMT IV',
          'MP17 Interferon/MHC-II (I)', 'MP18 Interferon/MHC-II (II)',]:
    
    my_title = i.split("/")[0]
    
    for samp in set(adata_vis.obs.Sample):
        adata_tmp = utils.select_slide(adata_vis, s=samp, batch_key="Sample")
        sc.tl.score_genes(adata_tmp, gene_list=gavish_sign[i], score_name=i,
                      use_raw=True)
        sc.pl.spatial(adata_tmp, color=i, size=1.3, img_key="hires", vmin="p25.0", vmax="p99.2", title=my_title,
                  cmap="YlGnBu", alpha_img=0.3, save=f"Gavish2023_{samp}_{my_title}.pdf") # RdPu vmin=0.2