In [None]:
import scanpy as sc
import muon as mu
import pandas as pd

import os
os.environ['R_HOME'] = '/gpfs/bwfor/work/ws/hd_fu399-conda/conda/envs/python_R/lib/R/'
import anndata2ri
import logging
from matplotlib import pyplot as plt
import seaborn as sns

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

In [None]:
%%R
library(Seurat)
library(scran)
library(BiocParallel)
library(sctransform)

In [None]:
dataset = sc.read("../../int_data/dataset_gcsf_qc.h5ad")
dataset.X = dataset.layers["soupX_counts"]



In [None]:
dataset.obs["stimulus"]

In [None]:
dataset.X = dataset.layers["soupX_counts"]
sc.pp.filter_genes(dataset, min_cells = 5)

In [None]:
proportional_fitting = sc.pp.normalize_total(dataset, target_sum = 1e4, inplace = False)
dataset.layers["log1pPF_normalization"] = sc.pp.log1p(proportional_fitting["X"])
dataset.layers["PFlog1PF_normalization"] = sc.pp.normalize_total(dataset, target_sum = None, layer = "log1pPF_normalization", inplace = False)["X"]

In [None]:
from scipy.sparse import csr_matrix, issparse

In [None]:
adata_pp = dataset.copy()
sc.pp.normalize_per_cell(adata_pp, counts_per_cell_after=1e6)
sc.pp.log1p(adata_pp)
sc.pp.pca(adata_pp, n_comps=15)
sc.pp.neighbors(adata_pp)
sc.tl.leiden(adata_pp, key_added="groups")

data_mat = adata_pp.X.T
input_groups = adata_pp.obs["groups"]

# convert to CSC if possible. See https://github.com/MarioniLab/scran/issues/70
if issparse(data_mat):
    if data_mat.nnz > 2**31 - 1:
        data_mat = data_mat.tocoo()
    else:
        data_mat = data_mat.tocsc()

In [None]:
%%R -i data_mat -i input_groups -o size_factors

size_factors = sizeFactors(
    computeSumFactors(
        SingleCellExperiment(
            list(counts=data_mat)), 
            clusters = input_groups,
            min.mean = 0.1,
            BPPARAM = MulticoreParam()
    )
)

In [None]:
dataset.obs["size_factors"] = size_factors
dataset.X = dataset.layers["soupX_counts"]
scran = dataset.X / dataset.obs["size_factors"].values[:, None]
dataset.layers["scran_normalization"] = csr_matrix(sc.pp.log1p(scran))

In [None]:
rna_data = dataset.copy()

In [None]:
%%R -i rna_data -o norm_x -o corrected_counts -o log_normalized

dataset = rna_data
seurat_obj = as.Seurat(dataset, counts="X", data = NULL)
seurat_obj = RenameAssays(seurat_obj, originalexp = "RNA")
res = SCTransform(object=seurat_obj, method = "glmGamPoi", return.only.var.genes = FALSE)

print(res)

norm_x = res@assays$SCT@scale.data
corrected_counts = res@assays$SCT@counts
log_normalized = res@assays$SCT@data

In [None]:
dataset.layers["scTransform_normalized"] = norm_x.T
dataset.layers["scTransform_counts"] = corrected_counts.T
dataset.layers["scTransform_log_normalized"] = log_normalized.T

In [None]:
metadata = pd.read_csv("../02_cell_ident/dataset_gcsf_metadata.csv")
dataset.obs["cell_type"] = metadata["SingleR_label"].to_list()

In [None]:
dataset.write("../../int_data/dataset_gcsf_qc_normalized.h5ad")

In [None]:
dataset = sc.read("../../int_data/dataset_gcsf_qc_normalized.h5ad")

In [None]:
def dim_red(dataset, key):
    sc.pp.pca(dataset)
    dataset.obsm[f"X_pca_{key}"] = dataset.obsm["X_pca"].copy()
    
    sc.pp.neighbors(dataset, use_rep = f"X_pca_{key}", key_added = f"{key}_neighbors")
    
    sc.tl.umap(dataset, neighbors_key = f"{key}_neighbors")
    dataset.obsm[f"X_umap_{key}"] = dataset.obsm["X_umap"].copy()

In [None]:
dataset.X = dataset.layers["log1pPF_normalization"]
dim_red(dataset, key = "log1pPF")

dataset.X = dataset.layers["PFlog1PF_normalization"]
dim_red(dataset, key = "PFlog1pPF")

dataset.X = dataset.layers["scran_normalization"]
dim_red(dataset, key = "scran")

dataset.X = dataset.layers["scTransform_normalized"]
dim_red(dataset, key = "sctransform")

In [None]:
color = "batch"

fig, ax = plt.subplots(ncols = 4, nrows = 1, figsize = (12,3))

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_log1pPF", ax = ax[0], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("Log1p Proportional Filtering\n")
axis.legend().remove()

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_PFlog1pPF", ax = ax[1], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("Proportional Filtering\nLog1p Proportional Filtering")
axis.legend().remove()

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_scran", ax = ax[2], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("Scran Normalization\n")
axis.legend().remove()

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_sctransform", ax = ax[3], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("scTransform Normalization\n")
axis.legend(title = "Batch", bbox_to_anchor = (1.1, 1))

plt.tight_layout()
#plt.savefig(f"{outputDir}01_color_batch.pdf", dpi = 300)
plt.show()

In [None]:
dataset = sc.read("../../int_data/dataset_gcsf_qc_normalized.h5ad")
dataset = dataset[dataset.obs["cell_type"] == "Neutrophils"]

In [None]:
dataset.X = dataset.layers["log1pPF_normalization"]
dim_red(dataset, key = "log1pPF")

dataset.X = dataset.layers["PFlog1PF_normalization"]
dim_red(dataset, key = "PFlog1pPF")

dataset.X = dataset.layers["scran_normalization"]
dim_red(dataset, key = "scran")

dataset.X = dataset.layers["scTransform_normalized"]
dim_red(dataset, key = "sctransform")

In [None]:
color = "batch"

fig, ax = plt.subplots(ncols = 4, nrows = 1, figsize = (12,3))

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_log1pPF", ax = ax[0], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("Log1p Proportional Filtering\n")
axis.legend().remove()

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_PFlog1pPF", ax = ax[1], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("Proportional Filtering\nLog1p Proportional Filtering")
axis.legend().remove()

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_scran", ax = ax[2], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("Scran Normalization\n")
axis.legend().remove()

axis = sc.pl.embedding(dataset, color = color, basis = "X_umap_sctransform", ax = ax[3], show = False)
axis.set_xlabel("UMAP_1")
axis.set_ylabel("UMAP_2")
axis.set_title("scTransform Normalization\n")
axis.legend(title = "Batch", bbox_to_anchor = (1.1, 1))

plt.tight_layout()
#plt.savefig(f"{outputDir}01_color_batch.pdf", dpi = 300)
plt.show()