In [None]:
import gc
import scanpy as sc
import muon as mu
import seaborn as sns
import os
import pandas as pd
from matplotlib import pyplot as plt

os.environ['R_HOME'] = '/gpfs/bwfor/work/ws/hd_fu399-conda/conda/envs/python_R/lib/R/'
import anndata2ri
import logging

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

In [None]:
%%R
library(SingleCellExperiment)
library(Seurat)
library(UCell)
library(ggplot2)

In [None]:
fisher_genes_up = pd.read_csv("../../../fisher_up_genes.csv", index_col = 0)["symbol"].to_list()
fisher_genes_down = pd.read_csv("../../../fisher_dn_genes.csv", index_col = 0)["symbol"].to_list()

In [None]:
%%R -i fisher_genes_up

fisher_genes_up

In [None]:
### integrated by scanvi

dataset = sc.read("../../int_data/dataset_integrated_cell_type.h5ad")

dataset.obs["condition"] = dataset.obs["PHENOTYPE"]
dataset.obs["cell_type"] = dataset.obs["cell_type"].to_list()
dataset.obs["cell_type"] = dataset.obs["cell_type"].fillna("Unknown")
dataset.obs["cell_type"] = dataset.obs["cell_type"].astype("category")
dataset.obs["organ"] = dataset.obs["Tissue"]

dataset.obs["cell_type_organ"] = [f"{cell_type}_{organ}" for cell_type, organ in zip(dataset.obs["cell_type"], dataset.obs["organ"])]

dataset = dataset[dataset.obs["cell_type"] == "Neutrophils", :].copy()

dataset.X = dataset.layers["scANVI_normalized"]
#sc.pp.normalize_total(dataset, target_sum = 1e4)
sc.pp.log1p(dataset)
sc.tl.score_genes(dataset, fisher_genes_up, ctrl_size=100, score_name = "fisher_up")
sc.pp.neighbors(dataset, use_rep = "X_scANVI")
sc.tl.umap(dataset)
sc.pl.umap(dataset, color = ["condition", "organ", "batch"], wspace = 0.6)

dataset.obs.to_csv("gene_set_scores_integrated_scanpy_scanpy.csv")

del dataset.obsp
del dataset.uns
gc.collect()

dataset.X = dataset.layers["scANVI_normalized"]

In [None]:
%%R -i dataset

sce <- SingleCellExperiment(list(counts=dataset@assays@data[["scANVI_normalized"]]),
    colData=dataset@colData,
)

row.names(sce) <- row.names(dataset)

data <- as.Seurat(sce, data = NULL)

#data <- NormalizeData(data)
data <- ScaleData(data, features = row.names(data))
data <- FindVariableFeatures(data)
data <- RunPCA(data, verbose = F)
data <- FindNeighbors(data, verbose = F)
data <- FindClusters(data, verbose = F)
data <- RunUMAP(data, dims = 1:20, verbose = F)

data <- AddModuleScore(data, list(fisher_genes_up), name = "Seurat_fisher_up", ctrl = 100)
data <- AddModuleScore_UCell(data, features = list("UCell_fisher_up" = fisher_genes_up))

write.csv(data@meta.data, "gene_set_scores_integrated_scanpy_R.csv")

DimPlot(data, group.by = "organ")

gc()

In [None]:
dataset = sc.read("../../int_data/dataset_integrated_cell_type.h5ad")

dataset.obs["condition"] = dataset.obs["PHENOTYPE"]
dataset.obs["cell_type"] = dataset.obs["cell_type"].to_list()
dataset.obs["cell_type"] = dataset.obs["cell_type"].fillna("Unknown")
dataset.obs["cell_type"] = dataset.obs["cell_type"].astype("category")
dataset.obs["organ"] = dataset.obs["Tissue"]

dataset.obs["cell_type_organ"] = [f"{cell_type}_{organ}" for cell_type, organ in zip(dataset.obs["cell_type"], dataset.obs["organ"])]

dataset = dataset[dataset.obs["cell_type"] == "Neutrophils", :].copy()
dataset.X = dataset.layers["soupX_counts"]
sc.pp.normalize_total(dataset, target_sum = 1e4)
sc.pp.log1p(dataset)
sc.pp.pca(dataset)
sc.tl.score_genes(dataset, fisher_genes_up, ctrl_size=100, score_name = "fisher_up")
sc.pp.neighbors(dataset)
sc.tl.umap(dataset)
sc.pl.umap(dataset, color = ["condition", "organ", "batch"], wspace = 0.6)

dataset.obs.to_csv("gene_set_scores_raw_scanpy.csv")

del dataset.obsp
del dataset.uns
gc.collect()

dataset.X = dataset.layers["soupX_counts"]

In [None]:
%%R -i dataset

sce <- SingleCellExperiment(list(counts=dataset@assays@data[["soupX_counts"]]),
    colData=dataset@colData,
)

row.names(sce) <- row.names(dataset)

data <- as.Seurat(sce, data = NULL)

data <- NormalizeData(data)
data <- ScaleData(data, features = row.names(data))
data <- FindVariableFeatures(data)
data <- RunPCA(data, verbose = F)
data <- FindNeighbors(data, verbose = F)
data <- FindClusters(data, verbose = F)
data <- RunUMAP(data, dims = 1:20, verbose = F)

data <- AddModuleScore(data, list(fisher_genes_up), name = "Seurat_fisher_up", ctrl = 100)
data <- AddModuleScore_UCell(data, features = list("UCell_fisher_up" = fisher_genes_up))

write.csv(data@meta.data, "gene_set_scores_raw_R.csv")

DimPlot(data, group.by = "organ")

gc()

In [None]:
dataset = sc.read("../../int_data/dataset_integrated_cell_type.h5ad")

dataset.obs["condition"] = dataset.obs["PHENOTYPE"]
dataset.obs["cell_type"] = dataset.obs["cell_type"].to_list()
dataset.obs["cell_type"] = dataset.obs["cell_type"].fillna("Unknown")
dataset.obs["cell_type"] = dataset.obs["cell_type"].astype("category")
dataset.obs["organ"] = dataset.obs["Tissue"]

dataset.obs["cell_type_organ"] = [f"{cell_type}_{organ}" for cell_type, organ in zip(dataset.obs["cell_type"], dataset.obs["organ"])]

dataset = dataset[dataset.obs["cell_type"] == "Neutrophils", :].copy()
dataset.X = dataset.layers["soupX_counts"]
sc.pp.normalize_total(dataset, target_sum = 1e4)
sc.pp.log1p(dataset)
sc.pp.scale(dataset, max_value = 10)
sc.pp.pca(dataset)
sc.tl.score_genes(dataset, fisher_genes_up, ctrl_size=100, score_name = "fisher_up")
sc.pp.neighbors(dataset)
sc.tl.umap(dataset)
sc.pl.umap(dataset, color = ["condition", "organ", "batch"], wspace = 0.6)

dataset.obs.to_csv("gene_set_scores_scaled_scanpy.csv")

del dataset.obsp
del dataset.uns
gc.collect()

dataset.X = dataset.layers["soupX_counts"]

In [None]:
%%R -i dataset

sce <- SingleCellExperiment(list(counts=dataset@assays@data[["soupX_counts"]]),
    colData=dataset@colData,
)

row.names(sce) <- row.names(dataset)

data <- as.Seurat(sce, data = NULL)

data <- NormalizeData(data)
data <- ScaleData(data, features = row.names(data))
data <- FindVariableFeatures(data)
data <- RunPCA(data, verbose = F)
data <- FindNeighbors(data, verbose = F)
data <- FindClusters(data, verbose = F)
data <- RunUMAP(data, dims = 1:20, verbose = F)

data <- AddModuleScore(data, list(fisher_genes_up), name = "Seurat_fisher_up", ctrl = 100)
data <- AddModuleScore_UCell(data, features = list("UCell_fisher_up" = fisher_genes_up))

write.csv(data@meta.data, "gene_set_scores_scaled_R.csv")

DimPlot(data, group.by = "organ")

gc()

In [None]:
scanpy_integrated = pd.read_csv("gene_set_scores_integrated_scanpy_scanpy.csv", index_col = 0)
scanpy_integrated_r = pd.read_csv("gene_set_scores_integrated_scanpy_R.csv", index_col = 0)

scanpy_raw = pd.read_csv("gene_set_scores_raw_scanpy.csv", index_col = 0)
scanpy_raw_r = pd.read_csv("gene_set_scores_raw_R.csv", index_col = 0)

scanpy_scaled = pd.read_csv("gene_set_scores_scaled_scanpy.csv", index_col = 0)
scanpy_scaled_r = pd.read_csv("gene_set_scores_scaled_R.csv", index_col = 0)


df = pd.DataFrame(index = scanpy_integrated.index, data = {"scanpy_int_scanpy": scanpy_integrated["fisher_up"],
                                                           "seurat_int_scanpy": scanpy_integrated_r["Seurat_fisher_up1"],
                                                           "ucell_int_scanpy": scanpy_integrated_r["UCell_fisher_up_UCell"],
                                                           
                                                           "sc_raw": scanpy_raw["fisher_up"],
                                                           "sc_scaled": scanpy_scaled["fisher_up"],
                                                           "seurat_raw": scanpy_raw_r["Seurat_fisher_up1"],
                                                           "seurat_scaled": scanpy_scaled_r["Seurat_fisher_up1"],
                                                           "ucell_raw": scanpy_raw_r["UCell_fisher_up_UCell"],
                                                           "ucell_scaled": scanpy_scaled_r["UCell_fisher_up_UCell"],
                                                           "organ": scanpy_integrated["organ"],
                                                           "condition": scanpy_integrated["condition"]})


In [None]:
sns.heatmap(df.corr(), vmin = -1, vmax = 1)

plt.show()