In [None]:
import scanpy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import anndata
from scipy.sparse import csr_matrix
import numpy as np

In [None]:
go_term_gene_matrix = pd.read_csv(".../Atlas/MN/go_sets_human.csv", index_col="Unnamed: 0")

In [None]:
sc.logging.print_versions()
sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.settings.verbosity = 3

In [None]:
import os
os.chdir(".../Atlas/10X_data/mouse_spatial/")
adata = sc.read_visium(".../Atlas/10X_data/mouse_spatial/spatial", genome=None, count_file='Visium_FFPE_Mouse_Kidney_filtered_feature_bc_matrix.h5', library_id=None, load_images=True, source_image_path="/home/kloetzer/Atlas/10X_data/mouse_spatial/spatial")
adata.var_names_make_unique()
adata.var["mt"] = adata.var_names.str.startswith("mt-")
sc.pp.calculate_qc_metrics(adata, qc_vars=["mt"], inplace=True)

In [None]:
adata

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(15, 4))
sns.distplot(adata.obs["total_counts"], kde=False, ax=axs[0])
sns.distplot(adata.obs["total_counts"][adata.obs["total_counts"] < 20000], kde=False, bins=40, ax=axs[1])
sns.distplot(adata.obs["n_genes_by_counts"], kde=False, bins=60, ax=axs[2])
sns.distplot(adata.obs["n_genes_by_counts"][adata.obs["n_genes_by_counts"] < 5000], kde=False, bins=60, ax=axs[3])

In [None]:
sns.distplot(adata.obs["pct_counts_mt"])

In [None]:
sc.pp.filter_cells(adata, min_counts=10000)
sc.pp.filter_cells(adata, max_counts=55000)
adata = adata[adata.obs["pct_counts_mt"] < 20]
print(f"#cells after MT filter: {adata.n_obs}")
sc.pp.filter_genes(adata, min_cells=5)

In [None]:
# Read the table with one-to-one orthologous human and mouse gene names
orthologs_table = pd.read_csv('/home/kloetzer/Atlas/EnsemblGeneLists/Genelist_V2_subset.csv')  

# Create a dictionary mapping mouse gene names to human gene names
orthologs_dict = dict(zip(orthologs_table['Mouse.gene.name'], orthologs_table['Gene.name']))

#subset
list_subset = orthologs_table['Mouse.gene.name'].tolist()

adata = adata[:, adata.var_names.isin(list_subset)]

# Change the gene names in the AnnData object to the corresponding human gene names
adata.var_names = adata.var_names.map(orthologs_dict)

In [None]:
#we increase hvg to 3000

In [None]:
sc.pp.normalize_total(adata, inplace=True)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, flavor="seurat", n_top_genes=3000, subset = True)

In [None]:
# Filter genes in AnnData object and GO-term_gene table
genes_to_keep = adata.var_names.intersection(go_term_gene_matrix.index)
adata_filtered = adata[:, genes_to_keep]
go_term_gene_matrix = go_term_gene_matrix.loc[genes_to_keep]



# Subsetting gene sets with more than 10 genes
go_term_gene_matrix = go_term_gene_matrix.loc[:, (go_term_gene_matrix == 1).sum() > 10]


mean_expression_df = pd.DataFrame(index=adata.obs_names)


mean_expression_dict = {}

for go_term in go_term_gene_matrix.columns:

    # Define an example gene set (use the first 10 genes in the dataset)
    gene_set = go_term_gene_matrix.index[go_term_gene_matrix[go_term] == 1].tolist()

    # Get the expression values for all cells and the gene set
    cell_expr = adata_filtered[:, gene_set].X

    mean_expression = np.asarray(cell_expr.mean(axis=1)).flatten()
    
    # Add the mean_expression values to the dictionary with the key as go_term
    mean_expression_dict[go_term] = mean_expression

# Convert the dictionary to a DataFrame
mean_expression_df = pd.DataFrame(mean_expression_dict, index=adata.obs_names)

In [None]:
import anndata
from scipy.sparse import csr_matrix

In [None]:
# Convert the mean_expression_df DataFrame to a sparse matrix
mean_expression_sparse = csr_matrix(mean_expression_df.values)

In [None]:
# Create a new AnnData object with the mean expression matrix as adata_new.X and copy UMAP information
adata_new = anndata.AnnData(X=mean_expression_sparse, obs=adata.obs, var=pd.DataFrame(index=mean_expression_df.columns), obsm=adata.obsm, uns=adata.uns)


In [None]:
sc.pp.pca(adata_new)
sc.pp.neighbors(adata_new)
sc.tl.umap(adata_new)
sc.tl.leiden(adata_new, key_added="clusters")

In [None]:
sc.settings.set_figure_params(dpi=200, frameon=False)
sc.set_figure_params(dpi=200)
sc.set_figure_params(figsize=(4, 4))
plt.rcParams['figure.dpi'] = 200
plt.rcParams['figure.figsize'] = (4, 4)

In [None]:
sc.pl.spatial(adata_new, img_key="hires", color="GO.0071470.cellular.response.to.osmotic.stress.BP", cmap = "rocket", frameon = False, title = "")

sc.pl.spatial(adata_new, img_key="hires", color="GO.0030104.water.homeostasis.BP", cmap = "rocket", frameon = False, title = "")

sc.pl.spatial(adata_new, img_key="hires", color="GO.0006885.regulation.of.pH.BP", cmap = "rocket", frameon = False, title = "")
