In [None]:
from pathlib import Path
import scanpy as sc
import cell2location
import matplotlib.pyplot as plt

DPI = 300
FONTSIZE = 20  # 42
sc.settings.set_figure_params(
    scanpy=True, dpi=100, transparent=True, vector_friendly=True, dpi_save=DPI
)
from matplotlib import rcParams

rcParams["pdf.fonttype"] = 42

import pandas as pd

from vistools import utils

import seaborn as sns
custom_params = {"font.size": FONTSIZE, "figure.dpi": 100, "savefig.dpi": DPI}
sns.set_theme(style="ticks", rc=custom_params)


In [None]:
SAMPLE_NAME = "concat-no_normal"
optimal_s = 0.1  # 1 1.2 1.5 2

In [None]:
if "concat" in SAMPLE_NAME:
    # load gene expression
    DIR2GENEXP = Path(f"/data/BCI-CRC/nasrine/data/CRC/spatial/public/Visium_Ozato_2023/cell2location/{SAMPLE_NAME}/cell2location_map-no_cycling_TME/sp.h5ad")
    adata_genexp = sc.read_h5ad(DIR2GENEXP)
    
# load joint analysis of microenvrionmentts
MICROENV_DIR = Path(
    f"/data/BCI-CRC/nasrine/data/CRC/spatial/public/Visium_Ozato_2023/cell2loc_spatialde2/{SAMPLE_NAME}"
) 
adata_microenv = sc.read_h5ad(MICROENV_DIR.joinpath("sp_segmentation_smoothness0.1.h5ad"))

In [None]:
DIR2SAVE = Path(f"/data/BCI-CRC/nasrine/data/CRC/spatial/public/Visium_Ozato_2023/cell2loc_spatialde2/{SAMPLE_NAME}/microenvs_geneexp/gene_signatures/")
# figures
FIG2SAVE = DIR2SAVE.joinpath("figures/")
FIG2SAVE.mkdir(parents=True, exist_ok=True)
sc.settings.figdir = FIG2SAVE

In [None]:
# only select spots that are in adata_genexp
adata_microenv = adata_microenv[adata_microenv.obs.index.isin(adata_genexp.obs.index)].copy()

In [None]:
# create adata gene exp with microenv assignment
adata_genexp.obs = adata_genexp.obs.merge(adata_microenv.obs[["segmentation_labels"]],
                       how="left",
                       left_index=True,
                       right_index=True
                      )

In [None]:
adata_genexp.write(DIR2SAVE.joinpath("sp_segmentation_smoothness0.1_raw_gene_counts.h5ad"))

In [None]:
### log normalise 
# keep raw
adata_genexp.layers["raw"] = adata_genexp.X.copy()  # preserve counts

# normalize + log1p
sc.pp.normalize_total(adata_genexp, target_sum=1e4, inplace=True)
adata_genexp.layers["normalised"] = adata_genexp.X.copy()
sc.pp.log1p(adata_genexp)

adata_genexp.layers["log1p"] = adata_genexp.X.copy()

adata_genexp.raw = adata_genexp  # keep normalised log1p

In [None]:
gavish_sign = pd.read_excel("/data/BCI-CRC/nasrine/data/gene_sets/gavishHallmarksTranscriptio2023_41586_2023_6130_MOESM6_ESM.xlsx",
                            sheet_name="Cancer MPs",
                           )
gavish_sign.head(5)

In [None]:
for i in ['MP6 Hypoxia', 'MP12 EMT-I', 'MP13 EMT-II', 'MP14 EMT-III ', 'MP15 EMT IV',
          'MP17 Interferon/MHC-II (I)', 'MP18 Interferon/MHC-II (II)',]:
    
    my_title = i.split("/")[0]
    
    sc.tl.score_genes(adata_genexp, gene_list=gavish_sign[i], score_name=i,
                      use_raw=True)

In [None]:
geneset_dict_sam = dict()

### EpiHR
geneSet = pd.read_csv('/data/BCI-CRC/Elise/genesets/Canellas_2022_gene_sig.csv')
geneSet['EpiHR'].dropna()
geneset_dict_sam['EpiHR'] = list(geneSet['EpiHR'].dropna())
geneSet['TME_HR'].dropna()
geneset_dict_sam['TME_HR'] = list(geneSet['TME_HR'].dropna())


### CRIS scores from Isella 2017
CRIS = pd.read_csv('/data/BCI-CRC/SO/genesets/Isella2017_CRIS_genes.csv')
#geneset_dict_sam['CRISA'] = [i for i in list(CRIS.iloc[:,0]) if str(i) != 'nan']
geneset_dict_sam['CRISB'] = [i for i in list(CRIS.iloc[:,1]) if str(i) != 'nan']

In [None]:
for i in geneset_dict_sam:
    
    my_title = i.split("/")[0]
    
    sc.tl.score_genes(adata_genexp, gene_list=geneset_dict_sam[i], score_name=i,
                      use_raw=True)

In [None]:
adata_genexp

In [None]:
columns2keep = ['MP6 Hypoxia', 'MP12 EMT-I', 'MP13 EMT-II', 'MP14 EMT-III ', 'MP15 EMT IV', 'MP17 Interferon/MHC-II (I)', 'MP18 Interferon/MHC-II (II)', 'EpiHR', 'TME_HR', 'CRISB']
df = pd.DataFrame(data=adata_genexp.obs.loc[:,columns2keep].to_numpy(),
                  index=adata_genexp.obs.index, columns=columns2keep,
                 )
df

In [None]:
df = df.merge(adata_genexp.obs[['segmentation_labels']], how='left', left_index=True, right_index=True)

In [None]:
df['Sample'] = df.index.to_series().str.split("_", expand=True)[0]

In [None]:
df.rename(columns={'MP17 Interferon/MHC-II (I)':'MP17 Interferon',
                   'MP18 Interferon/MHC-II (II)': 'MP18 Interferon',
                  }, inplace=True)

In [None]:
df.to_csv(DIR2SAVE.joinpath('cell2loc_gene_signature_scores_microenvs.csv'),
          sep='\t',
          header=True,
          index=True
         )

In [None]:
DIR2SAVE