In [None]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import gc

import os

os.environ['R_HOME'] = '/gpfs/bwfor/work/ws/hd_fu399-conda/conda/envs/python_R/lib/R/'
import anndata2ri
import logging

import rpy2.rinterface_lib.callbacks as rcb
import rpy2.robjects as ro

rcb.logger.setLevel(logging.ERROR)
ro.pandas2ri.activate()
anndata2ri.activate()

%load_ext rpy2.ipython

import pymer4
from pymer4.models import Lmer, Lm

In [None]:
scanpy_integrated = pd.read_csv("gene_set_scores_integrated_scanpy_scanpy_ucb.csv", index_col = 0)
scanpy_integrated_r = pd.read_csv("gene_set_scores_integrated_scanpy_R_ucb.csv", index_col = 0)

scanpy_raw = pd.read_csv("gene_set_scores_raw_scanpy_ucb.csv", index_col = 0)
scanpy_raw_r = pd.read_csv("gene_set_scores_raw_R_ucb.csv", index_col = 0)

scanpy_scaled = pd.read_csv("gene_set_scores_scaled_scanpy_ucb.csv", index_col = 0)
scanpy_scaled_r = pd.read_csv("gene_set_scores_scaled_R_ucb.csv", index_col = 0)

seurat_integrated = pd.read_csv("gene_set_scores_integrated_seurat_R_ucb.csv", index_col = 0)

df = pd.DataFrame(index = scanpy_integrated.index, data = {"scanpy_int_scanpy": scanpy_integrated["fisher_up"],
                                                           "seurat_int_scanpy": scanpy_integrated_r["Seurat_fisher_up1"],
                                                           "ucell_int_scanpy": scanpy_integrated_r["UCell_fisher_up_UCell"],
                                                           
                                                           "sc_raw": scanpy_raw["fisher_up"],
                                                           "sc_scaled": scanpy_scaled["fisher_up"],
                                                           
                                                           "seurat_raw": scanpy_raw_r["Seurat_fisher_up1"],
                                                           "seurat_scaled": scanpy_scaled_r["Seurat_fisher_up1"],
                                                           "ucell_raw": scanpy_raw_r["UCell_fisher_up_UCell"],
                                                           "ucell_scaled": scanpy_scaled_r["UCell_fisher_up_UCell"],
                                                           
                                                           "seurat_int_seurat": seurat_integrated["Seurat_fisher_up1"],
                                                           "ucell_int_seurat": seurat_integrated["UCell_fisher_up_UCell"],
                                                           "organ": scanpy_integrated["organ"],
                                                           "condition": scanpy_integrated["condition"],
                                                           "batch" : scanpy_integrated["batch"]})

In [None]:
fig, ax = plt.subplots(ncols = 4, nrows = 1, figsize = (16,4))

sns.violinplot(data = df, x = "organ", y = "seurat_raw", hue = "condition", hue_order = ["none", "IFN-beta", "IFN-gamma", "G-CSF"], ax = ax[0])
sns.violinplot(data = df, x = "organ", y = "seurat_scaled", hue = "condition", hue_order = ["none", "IFN-beta", "IFN-gamma", "G-CSF"], ax = ax[1])


sns.violinplot(data = df, x = "organ", y = "seurat_int_scanpy", hue = "condition", hue_order = ["none", "IFN-beta", "IFN-gamma", "G-CSF"], ax = ax[2])
sns.violinplot(data = df, x = "organ", y = "seurat_int_seurat", hue = "condition", hue_order = ["none", "IFN-beta", "IFN-gamma", "G-CSF"], ax = ax[3])



for i, _ in enumerate(ax):
    ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation = 45, ha = "right")

ax[0].set_title("raw data")
ax[1].set_title("scaled data")
ax[2].set_title("scanpy integrated")
ax[3].set_title("seurat integrated")

plt.tight_layout()
plt.show()

In [None]:
fig, ax = plt.subplots(ncols = 4, nrows = 1, figsize = (16,4))

sns.violinplot(data = df, x = "condition", y = "seurat_raw", ax = ax[0])
sns.violinplot(data = df, x = "condition", y = "seurat_scaled", ax = ax[1])


sns.violinplot(data = df, x = "condition", y = "seurat_int_scanpy", ax = ax[2])
sns.violinplot(data = df, x = "condition", y = "seurat_int_seurat", ax = ax[3])



for i, _ in enumerate(ax):
    ax[i].set_xticklabels(ax[i].get_xticklabels(), rotation = 45, ha = "right")

ax[0].set_title("raw data")
ax[1].set_title("scaled data")
ax[2].set_title("scanpy integrated")
ax[3].set_title("seurat integrated")

plt.tight_layout()
plt.show()

In [None]:
%%R -i df

library(lme4)

lm0 = lmer(seurat_scaled ~ 1 + condition + (1|batch), data = df)
lm1 = lmer(seurat_scaled ~ 1 + (1|batch), data = df)

anova(lm0, lm1)

In [None]:
%%R -i df

library(lme4)

lm0 = lmer(sc_scaled ~ 1 + condition + (1|batch), data = df)
lm1 = lmer(sc_scaled ~ 1 + (1|batch), data = df)

anova(lm0, lm1)

In [None]:
%%R -i df

library(lme4)

lm0 = lmer(seurat_int_seurat ~ 1 + condition + (1|batch), data = df)
lm1 = lmer(seurat_int_seurat ~ 1 + (1|batch), data = df)

anova(lm0, lm1)

In [None]:
%%R -i df

library(lme4)

lm0 = lmer(scanpy_int_scanpy ~ 1 + condition + (1|batch), data = df)
lm1 = lmer(scanpy_int_scanpy ~ 1 + (1|batch), data = df)

anova(lm0, lm1)

In [None]:
import scanpy as sc

dataset = sc.read("../../int_data/dataset_integrated_cell_type.h5ad")
dataset.obs[["AvgSpotLen", "Bases", "Bytes", "version"]] = dataset.obs[["AvgSpotLen", "Bases", "Bytes", "version"]].astype("str")
dataset = dataset[dataset.obs["cell_type"] == "Neutrophils", :].copy()

In [None]:
dataset.obs[df.columns] = df

In [None]:
fisher_genes_up = pd.read_csv("../../../fisher_up_genes.csv", index_col = 0)["symbol"].to_list()
fisher_genes_down = pd.read_csv("../../../fisher_dn_genes.csv", index_col = 0)["symbol"].to_list()

In [None]:
%%R -i fisher_genes_up -o fisher_genes_up_mouse

gene_list <- fisher_genes_up
library(dplyr)
mouse_human_genes = read.csv("http://www.informatics.jax.org/downloads/reports/HOM_MouseHumanSequence.rpt",sep="\t")

convert_human_to_mouse <- function(gene_list){

  output = c()

  for(gene in gene_list){
    class_key = (mouse_human_genes %>% filter(Symbol == gene & Common.Organism.Name=="human"))[['DB.Class.Key']]
    if(!identical(class_key, integer(0)) ){
      human_genes = (mouse_human_genes %>% filter(DB.Class.Key == class_key & Common.Organism.Name=="mouse, laboratory"))[,"Symbol"]
      for(human_gene in human_genes){
        output = append(output,human_gene)
      }
    }
  }

  return (output)
}

fisher_genes_up_mouse <- convert_human_to_mouse(fisher_genes_up)

In [None]:
dataset.X = dataset.layers["soupX_counts"]

In [None]:
%%R -i dataset

counts <- dataset@assays@data[["soupX_counts"]]

saveRDS(counts, "count_matrix.rds")



In [None]:
%%R

dim(counts)

In [None]:
%%R

row.names(counts) <- row.names(dataset)
colnames(counts) <- colnames(dataset)

saveRDS(counts, "count_matrix.rds")

In [None]:
dataset.obs.to_csv("metadata.csv")

In [None]:
%%R

saveRDS(fisher_genes_up_mouse, "fisher_genes_up.rds")

dataset.obs.to_csv("metadata.csv")

In [None]:
sc.pp.filter_genes(dataset, min_cells = 5)
sc.pp.normalize_total(dataset, target_sum = 1e4)
sc.pp.log1p(dataset)



In [None]:
dataset

In [None]:
normalized_expression = dataset.to_df()

In [None]:
normalized_expression[[col for col in dataset.obs.columns if "seurat" in col] + ["condition", "batch"]] = dataset.obs[[col for col in dataset.obs.columns if "seurat" in col] + ["condition", "batch"]]

In [None]:
normalized_expression

In [None]:
normalized_expression.to_csv("expression.csv")