In [None]:
## Notebook env: cell2loc_env (cell2loc_env kernel)
## this notebook performs cell2location to *spatially* deconvolute cells using a reference annotated scRNAseq dataset

In [1]:
import sys
import scanpy as sc
import anndata
import pandas as pd
import numpy as np
import os
import gc


import matplotlib as mpl
from matplotlib import rcParams
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.io import mmread

# silence scanpy that prints a lot of warnings
import warnings
warnings.filterwarnings('ignore')
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs

In [2]:
# Set paths to data and results used through the document:
sp_data_folder = '/home/ssobti/projects/farnaz_spatial/data/monkey_data/unpacked_data/'
results_folder = '/home/ssobti/projects/farnaz_spatial/output_data/monkey/cell2location/old_cell2location_multi_ref_annot/'
nb_mtx_path = '/home/ssobti/projects/farnaz_spatial/output_data/monkey/UMI_normalized_seurat_slices/'

regression_model_output = 'RegressionGeneBackgroundCoverageTorch_22covariates_291834cells_13082genes'
reg_path = f'{results_folder}regression_model/{regression_model_output}/'

In [4]:
## snRNAseq reference (raw counts)
adata_snrna_raw = sc.read(f'{reg_path}sc.h5ad')

In [5]:
## convert mouse gene name in reference to human names

import gseapy as gp
from gseapy import Biomart
bm = Biomart()
# note the dataset and attribute names are different
m2h = bm.query(dataset='mmusculus_gene_ensembl',
               attributes=['ensembl_gene_id','external_gene_name',
                           'hsapiens_homolog_ensembl_gene',
                           'hsapiens_homolog_associated_gene_name'])


# get a dict symbol mappings
m2h_dict = {}
for i, row in m2h.loc[:,["external_gene_name", "hsapiens_homolog_associated_gene_name"]].iterrows():
    if row.isna().any(): continue
    m2h_dict[row['external_gene_name']] = row["hsapiens_homolog_associated_gene_name"]

m2h_genes_input = adata_snrna_raw.var['features']
m2h_genes_output = []

for gene in m2h_genes_input:
    if gene in m2h_dict.keys():
        m2h_genes_output.append(m2h_dict[gene])
    else:
        m2h_genes_output.append(gene)

In [8]:
df = pd.DataFrame(m2h_genes_output)

In [11]:
df.to_csv(results_folder + 'm2h_genes.csv', index=False)