In [None]:
import scanpy as sc
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from statannotations.Annotator import Annotator, PValueFormat

import os

os.environ['R_HOME'] = '/gpfs/bwfor/work/ws/hd_fu399-conda/conda/envs/python_R/lib/R/'
import anndata2ri
import logging

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

sns.set_style("whitegrid", {'axes.grid' : False})

In [None]:
%%R
library(lme4)

In [None]:
fisher_genes_up = pd.read_csv("../../../fisher_up_genes.csv", index_col = 0)["symbol"].to_list()
fisher_genes_down = pd.read_csv("../../../fisher_dn_genes.csv", index_col = 0)["symbol"].to_list()

In [None]:
%%R -i fisher_genes_up -o fisher_genes_up_mouse

gene_list <- fisher_genes_up
library(dplyr)
mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")

convert_human_to_mouse <- function(gene_list){

  output = c()

  for(gene in gene_list){
    class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="human"))[['DB.Class.Key']]
    if(!identical(class_key, integer(0)) ){
      human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="mouse, laboratory"))[,"Symbol"]
      for(human_gene in human_genes){
        output = append(output,human_gene)
      }
    }
  }

  return (output)
}

fisher_genes_up_mouse <- convert_human_to_mouse(fisher_genes_up)

In [None]:
dataset = sc.read("../../int_data/dataset_qc_normalized.h5ad")
dataset = dataset[dataset.obs["cell_type"] == "Neutrophils",:]

In [None]:
dataset.obs["Run"].unique()

In [None]:
dataset.obs["Condition"].value_counts()

In [None]:
dataset

In [None]:
organ_map = {'Bone marrow neutrophil': "bone_marrow", 'Liver neutrophil': "liver", 'Peripheral blood neutrophil': "peripheral_blood", 'Peritoneal cavity neutrophil': "peritoneal_cavity", 'Spleen neutrophil': "spleen"}
condition_map = {'E. coli challenge (24h)': "Ecoli", 'Steady state': "steady_state"}
dataset.obs["condition"] = dataset.obs["Condition"].map(condition_map)
dataset.obs["organ"] = dataset.obs["Cell_type"].map(organ_map)

In [None]:
scanpy_integrated = pd.read_csv(f"../05_gene_set_scores/gene_set_scores_integrated_scanpy_scanpy.csv", index_col = 0)
scanpy_integrated_r = pd.read_csv(f"../05_gene_set_scores/gene_set_scores_integrated_scanpy_R.csv", index_col = 0)

scanpy_raw = pd.read_csv(f"../05_gene_set_scores/gene_set_scores_raw_scanpy.csv", index_col = 0)
scanpy_raw_r = pd.read_csv(f"../05_gene_set_scores/gene_set_scores_raw_R.csv", index_col = 0)

scanpy_scaled = pd.read_csv(f"../05_gene_set_scores/gene_set_scores_scaled_scanpy.csv", index_col = 0)
scanpy_scaled_r = pd.read_csv(f"../05_gene_set_scores/gene_set_scores_scaled_R.csv", index_col = 0)

seurat_integrated = pd.read_csv(f"../05_gene_set_scores/gene_set_scores_integrated_seurat_R.csv", index_col = 0)

#scgsea = pd.read_csv(f"../05_gene_set_scores/gcsf_metadata_scgsea.csv", index_col = 0)

df = pd.DataFrame(index = scanpy_integrated.index, data = {"scanpy_int_scanpy": scanpy_integrated["fisher_up"],
                                                           "seurat_int_scanpy": scanpy_integrated_r["Seurat_fisher_up1"],
                                                           "ucell_int_scanpy": scanpy_integrated_r["UCell_fisher_up_UCell"],
                                                           
                                                           "sc_raw": scanpy_raw["fisher_up"],
                                                           "sc_scaled": scanpy_scaled["fisher_up"],
                                                           
                                                           "seurat_raw": scanpy_raw_r["Seurat_fisher_up1"],
                                                           "seurat_scaled": scanpy_scaled_r["Seurat_fisher_up1"],
                                                           "ucell_raw": scanpy_raw_r["UCell_fisher_up_UCell"],
                                                           "ucell_scaled": scanpy_scaled_r["UCell_fisher_up_UCell"],
                                                           
                                                           "seurat_int_seurat": seurat_integrated["Seurat_fisher_up1"],
                                                           "ucell_int_seurat": seurat_integrated["UCell_fisher_up_UCell"],
                                                           "organ": scanpy_integrated["organ"],
                                                           "condition": scanpy_integrated["condition"],
                                                           "batch" : scanpy_integrated["batch"],
                                                           #"scGSEA": scgsea["scgsea"]
                                                          })

df.to_csv(f"gene_set_scores_total.csv")
df.to_csv(f"gene_set_scores_total_xie.csv")

In [None]:
%%R -i df -o pvalue_ecoli

gene_set_scores <- df
gene_set_scores$condition <- factor(gene_set_scores$condition, levels = c("steady_state", "Ecoli"))
gene_set_scores$batch <- factor(gene_set_scores$batch)
gene_set_scores <- gene_set_scores[gene_set_scores$condition %in% c("steady_state", "Ecoli"),]

lm0 <- lmerTest::lmer(sc_raw ~ (1|organ) + (1|batch), data = gene_set_scores, REML = FALSE)
lm1 <- lmerTest::lmer(sc_raw ~ condition + (1|organ) + (1|batch), data = gene_set_scores, REML = FALSE)

test <- anova(lm0, lm1)
pvalue_ecoli <- test$`Pr(>Chisq)`[2]


In [None]:
pairs = [
    ("steady_state", "Ecoli"),
]

p_values = [pvalue_ecoli[0]]

fig, ax = plt.subplots(ncols = 1, nrows = 1, figsize = (4,4))

ax0_parameters = {
    'data':    df,
    'x':       'condition',
    'y':       'sc_raw',
    'ax': ax
}

sns.boxplot(order = ["steady_state", "Ecoli"],
            whis = (0,100),
            **ax0_parameters)

annotator = Annotator(pairs = pairs,
                      **ax0_parameters)
annotator.configure(loc = "inside", line_offset = "asd")
annotator.set_pvalues(p_values)

annotator.annotate()

ax.set_title("Module Score")
ax.set_ylabel("score")
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, ha = "right")

plt.tight_layout()
plt.savefig("module_scores.pdf", dpi = 300, bbox_inches = "tight")
plt.show()

In [None]:
dataset.X = dataset.layers["log1pPF_normalization"]

In [None]:
hvg = dataset.copy()
sc.pp.highly_variable_genes(hvg, n_top_genes = 2000)
hvg = dataset[:, hvg.var.highly_variable].copy()

In [None]:
hvg.obs["module_score"] = df["sc_raw"].clip(lower = np.quantile(df["sc_raw"], 0.05),
                                            upper = np.quantile(df["sc_raw"], 0.95))
hvg.X = hvg.layers["log1pPF_normalization"]

sc.pp.pca(hvg)
sc.pp.neighbors(hvg)
sc.tl.umap(hvg)

In [None]:
sc.set_figure_params(vector_friendly=True, dpi_save=300)

fig, ax = plt.subplots(ncols = 1, nrows = 2, figsize = (4,5.5))

sc.pl.umap(hvg,
           color = "condition",
           ax = ax[0],
           show = False)
sc.pl.umap(hvg,
           color = "module_score",
           colorbar_loc=None,
           cmap = "viridis",
           ax = ax[1],
           show = False)

ax[0].set_title("Experimental Condition")
ax[1].set_title("Module Score\nCore Program")
plt.tight_layout()
plt.savefig("umap.pdf", dpi = 300, bbox_inches = "tight")
plt.show()

In [None]:
hvg.obsm

In [None]:
umap_df = pd.DataFrame(hvg.obsm["X_umap"], columns = ["UMAP1", "UMAP2"])
umap_df["sc_raw"] = df["sc_raw"].clip(lower = np.quantile(df["sc_raw"], 0.05),
                                      upper = np.quantile(df["sc_raw"], 0.95)).to_list()
umap_df["condition"] = hvg.obs["condition"].to_list()
umap_df["inflammation"] = umap_df["condition"].map({"steady_state": "control", "Ecoli": "inflamed"})

umap_df.to_csv("xie_umap.csv")

In [None]:
fisher_set = dataset[:, dataset.var_names.isin(fisher_genes_up_mouse)].copy()

In [None]:
fisher_set.X = fisher_set.layers["log1pPF_normalization"]
sc.pp.scale(fisher_set, max_value = 10)

fisher_set.layers["scaled"] = fisher_set.X
df = fisher_set.to_df(layer = "scaled")
df = df.clip(lower = np.quantile(df, 0.1), upper = np.quantile(df, 0.9))
fisher_set.layers["scaled_clipped"] = df.values

df = fisher_set.to_df(layer = "log1pPF_normalization")
df = df.clip(lower = np.quantile(df, 0.10), upper = np.quantile(df, 0.90))
fisher_set.layers["quantiled"] = df.values

from sklearn.preprocessing import StandardScaler

df = fisher_set.to_df(layer = "log1pPF_normalization")

transformed = StandardScaler().fit_transform(df.values)

fisher_set.layers["standard_scaled"] = transformed

df = fisher_set.to_df(layer = "standard_scaled")
df = df.clip(lower = np.quantile(df, 0.1), upper = np.quantile(df, 0.9))
fisher_set.layers["standard_scaled_clipped"] = df.values

df = fisher_set.to_df(layer = "log1pPF_normalization")
genes_to_plot = df[df.sum().sort_values(ascending=False).index].columns.to_list()

In [None]:
ax_dict = sc.pl.heatmap(fisher_set,
                        genes_to_plot,
                        groupby='condition',
                        layer = "scaled",
                        standard_scale = "var",
                        cmap = "YlOrBr",
                        show = False,
                        figsize = (4,4),
                        )

ax_dict["groupby_ax"].set_ylabel("condition")

plt.savefig("single_cell_heatmap.pdf", dpi = 300, bbox_inches = "tight")
plt.show()

In [None]:
fisher_set.X = fisher_set.layers["log1pPF_normalization"]
fisher_set.obs["condition_2"] = fisher_set.obs["condition"].map({"steady_state": "not_inflamed", "Ecoli": "inflamed"})
sc.tl.rank_genes_groups(fisher_set, 'condition_2', method='wilcoxon', key_added = "wilcoxon")
goi = sc.get.rank_genes_groups_df(fisher_set, group = "not_inflamed", key = "wilcoxon").sort_values("pvals_adj")[0:30]["names"].to_list()

In [None]:
def evenly_distribute_genes_of_interest(labels, positions_idx):
    gene_list = fisher_set.var_names.to_list()
    spacing = int(np.floor(len(gene_list) / len(positions_idx))) + 1
    new_gene_list = []
    for gene in labels:
        gene_list.pop(gene_list.index(gene))
    label_counter = 0
    gene_counter = 0
    for i, _ in enumerate(gene_list + labels):
        if i%spacing == 0 and label_counter < len(labels):
            new_gene_list.append(labels[label_counter])
            label_counter += 1
        elif gene_counter < len(gene_list):
            new_gene_list.append(gene_list[gene_counter])
            gene_counter += 1
    return new_gene_list

labels = []
positions = []
positions_idx = []

for i, label in enumerate(fisher_set.var_names):
    if label in goi:
        labels.append(label),
        positions.append(tuple([i,i]))
        positions_idx.append(i)
        
x = evenly_distribute_genes_of_interest(labels, positions_idx)

In [None]:
fig, ax = plt.subplots(ncols = 1, nrows = 1, figsize = (14,5))
ax_dict = sc.pl.matrixplot(fisher_set,
                           x,
                           groupby='condition',
                         layer = "scaled",
                         cmap = "bwr",
                         #standard_scale = "var",
                         swap_axes = False,
                         categories_order = ["steady_state", "Ecoli"],
                         ax = ax,
                         show = False,
                         colorbar_title = "Expr")

ax_dict["mainplot_ax"].set_xticks([i for i, gene in enumerate(x) if gene in goi])
ax_dict["mainplot_ax"].set_xticklabels([gene for gene in x if gene in goi], ha = "center", rotation = 45)
#ax_dict["color_legend_ax"].remove()

plt.savefig("heatmap_fisher.pdf", dpi = 300, bbox_inches = "tight")
plt.show()

In [None]:
raw_data = fisher_set.to_df(layer = "scaled")
raw_data[fisher_set.obs.columns] = fisher_set.obs

In [None]:
raw_data.to_csv("xie_raw_data_heatmap.csv")

In [None]:
###sort by expression
df = fisher_set.to_df(layer = "log1pPF_normalization")


genes_to_plot = df[df.sum().sort_values(ascending=False).index].columns.to_list()

fig, ax = plt.subplots(ncols = 1, nrows = 4, figsize = (15,15))

ax_dict = sc.pl.dotplot(fisher_set, genes_to_plot[0:50],
              groupby='condition',
              layer = "log1pPF_normalization",
              dendrogram=False,
              dot_max=0.3,
              dot_min=0.01,
              ax = ax[0],
              show = False,
              standard_scale='var',
              categories_order = ["steady_state", "Ecoli"],
              cmap = "RdYlBu_r",
              mean_only_expressed = False)

ax_dict["size_legend_ax"].remove()
ax_dict["color_legend_ax"].remove()
ax_dict["mainplot_ax"].set_xticklabels(ax_dict["mainplot_ax"].get_xticklabels(), ha = "right", rotation = 45)

ax_dict = sc.pl.dotplot(fisher_set, genes_to_plot[51:100],
              groupby='condition',
              layer = "log1pPF_normalization",
              dendrogram=False,
              dot_max=0.3,
              dot_min=0.01,
              ax = ax[1],
              show = False,
              standard_scale='var',
              categories_order = ["steady_state", "Ecoli"],
              cmap = "RdYlBu_r",
              mean_only_expressed = False)
ax_dict["size_legend_ax"].remove()
ax_dict["color_legend_ax"].remove()
ax_dict["mainplot_ax"].set_xticklabels(ax_dict["mainplot_ax"].get_xticklabels(), ha = "right", rotation = 45)

ax_dict = sc.pl.dotplot(fisher_set, genes_to_plot[101:150],
              groupby='condition',
              layer = "log1pPF_normalization",
              dendrogram=False,
              dot_max=0.3,
              dot_min=0.01,
              ax = ax[2],
              show = False,
              standard_scale='var',
              categories_order = ["steady_state", "Ecoli"],
              cmap = "RdYlBu_r",
              mean_only_expressed = False)
ax_dict["size_legend_ax"].remove()
ax_dict["color_legend_ax"].remove()
ax_dict["mainplot_ax"].set_xticklabels(ax_dict["mainplot_ax"].get_xticklabels(), ha = "right", rotation = 45)

ax_dict = sc.pl.dotplot(fisher_set, genes_to_plot[151:],
              groupby='condition',
              layer = "log1pPF_normalization",
              dendrogram=False,
              dot_max=0.3,
              dot_min=0.01,
              ax = ax[3],
              show = False,
              standard_scale='var',
              categories_order = ["steady_state", "Ecoli"],
              cmap = "RdYlBu_r",
              mean_only_expressed = False)

ax_dict["mainplot_ax"].set_xticklabels(ax_dict["mainplot_ax"].get_xticklabels(), ha = "right", rotation = 45)

plt.subplots_adjust(bottom = 0, top = 0.6)
#plt.tight_layout(pad=0)
plt.savefig("dotplot.pdf", dpi = 300, bbox_inches = "tight")
plt.show()

In [None]:
dataset = sc.read("../../int_data/dataset_qc_with_velocity.h5ad")

organ_map = {'Bone marrow neutrophil': "bone_marrow", 'Liver neutrophil': "liver", 'Peripheral blood neutrophil': "peripheral_blood", 'Peritoneal cavity neutrophil': "peritoneal_cavity", 'Spleen neutrophil': "spleen"}
condition_map = {'E. coli challenge (24h)': "Ecoli", 'Steady state': "steady_state"}
dataset.obs["condition"] = dataset.obs["Condition"].map(condition_map)
dataset.obs["organ"] = dataset.obs["Cell_type"].map(organ_map)

fisher_set = dataset[:, dataset.var_names.isin(fisher_genes_up_mouse)].copy()

In [None]:
velocity_set = fisher_set[:,~np.all(np.isnan(fisher_set.layers["velocity"]), axis=0)].copy()

In [None]:
df = velocity_set.to_df(layer = "velocity").dropna(axis = 1)
df = df.clip(lower = np.quantile(df, 0.1),
             upper = np.quantile(df, 0.9))
df[df.columns] = StandardScaler().fit_transform(df.values)
df["condition"] = velocity_set.obs["condition"].to_list()

fig, ax = plt.subplots(ncols = 1, nrows = 1, figsize = (18,5))
ax_dict = sc.pl.matrixplot(velocity_set,
                           df.groupby("condition").mean().sort_values(by = "steady_state", axis = 1).columns.to_list(), 
                           groupby='condition',
                           layer = "velocity",
                           cmap = "RdYlBu_r",
                           #standard_scale = "var",
                           categories_order = ["steady_state", "Ecoli"],
                           ax = ax,
                           show = False,
                           colorbar_title = "Expr")

#ax_dict["mainplot_ax"].set_xticks([i for i, gene in enumerate(x) if gene in goi])
#ax_dict["mainplot_ax"].set_xticklabels([gene for gene in x if gene in goi], ha = "right", rotation = 45)
#ax_dict["color_legend_ax"].remove()

plt.savefig("heatmap_fisher_velocity.pdf", dpi = 300, bbox_inches = "tight")
plt.show()

In [None]:
df = velocity_set.to_df(layer = "velocity").dropna(axis = 1)
df = df.clip(lower = np.quantile(df, 0.1),
             upper = np.quantile(df, 0.9))
df[df.columns] = StandardScaler().fit_transform(df.values)
df["condition"] = velocity_set.obs["condition"].to_list()


df["batch"] = velocity_set.obs["batch"].to_list()
df["organ"] = velocity_set.obs["organ"].to_list()
plotz = df.groupby(["condition", "batch", "organ"]).mean().reset_index().melt(id_vars = ["condition", "batch", "organ"])
plotz.to_csv(f"velocity_grouped.csv")

In [None]:
%%R -i plotz -o pvalue_ecoli

gene_set_scores <- plotz
gene_set_scores$condition <- factor(gene_set_scores$condition, levels = c("steady_state", "Ecoli"))
gene_set_scores$batch <- factor(gene_set_scores$batch)
gene_set_scores <- gene_set_scores[gene_set_scores$condition %in% c("steady_state", "Ecoli"),]

lm0 <- lmerTest::lmer(value ~ (1|organ) + (1|batch), data = gene_set_scores, REML = FALSE)
lm1 <- lmerTest::lmer(value ~ condition + (1|organ) + (1|batch), data = gene_set_scores, REML = FALSE)

test <- anova(lm0, lm1)
pvalue_ecoli <- test$`Pr(>Chisq)`[2]


In [None]:
sc.set_figure_params(vector_friendly=True, dpi_save=300)
sns.set_style({'axes.grid' : False})
pairs = [
    ("steady_state", "Ecoli")
]

p_values = [pvalue_ecoli[0]]

fig, ax = plt.subplots(ncols = 1, nrows = 1, figsize = (4,4))

ax0_parameters = {
    'data':    plotz,
    'x':       'condition',
    'y':       'value',
    'ax': ax,
    'order': ["steady_state", "Ecoli"]
}

sns.boxplot(whis = (0,100),
            **ax0_parameters)

annotator = Annotator(pairs = pairs,
                      **ax0_parameters)
annotator.configure(loc = "inside", line_offset = "asd")
annotator.set_pvalues(p_values)

annotator.annotate()

ax.set_title("Fisher Velocity")
ax.set_ylabel("velocity vector")
ax.set_xlabel("")
ax.set_xticklabels(ax.get_xticklabels(), rotation = 45, ha = "right")

plt.tight_layout()
plt.savefig("velocity_scores.pdf", dpi = 300, bbox_inches = "tight")
plt.show()