In [None]:
library(Seurat)
library(stringr)
library(data.table)
library(tidyverse)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(cowplot)
library(SingleR)
library(xlsx)
library(writexl)
library(dplyr)
library(ggrepel)
library(svglite)
library(Tempora)
library(ggalluvial)
library(RColorBrewer)
library(ggpubr)

library(Tempora)
library(Seurat)
library(RCurl)
library(tidyverse)
library(igraph)
library(ggraph)
library(graphlayouts)
library(ggforce)
library(scatterpie)
library(RColorBrewer)
library(igraph)
library(ggrepel)
library(stringr)
library(scales)

library(ggpubr)
library(patchwork)

In [None]:
save_plot = function(plotobj,outname, fig.width, fig.height)
{
  print(paste(outname, fig.width, fig.height))
      
  fname=paste(outname, "png", sep=".")
  print(paste("Saving to file", fname))
  png(filename=fname, width = fig.width, height = fig.height, units = 'in', res = 300)#width = fig.width*100, height=fig.height*100)
  plot(plotobj)
  dev.off()
  
  fname=paste(outname, "pdf", sep=".")
  print(paste("Saving to file", fname))
  pdf(file=fname, width = fig.width, height=fig.height)
  plot(plotobj)
  dev.off()
  

  fname=paste(outname, "svg", sep=".")
  print(paste("Saving to file", fname))
  svglite::svglite(file = fname, width = fig.width, height = fig.height)
  plot(plotobj)
  dev.off()
  
  return(plotobj)
}


In [None]:
indf = read.table("/mnt/t/rnaseq/kami_nasal_swab/all_umi_counts_day.tsv", header=TRUE, sep ="\t")

In [None]:
rownames(indf) = indf$gene_name

In [None]:
indf$gene_name = NULL

In [None]:
colnames(indf)

In [None]:
# invalid sample
indf$days14_2200076r_1_e6 = NULL

In [None]:
"days14_2200076r_1_e6" %in% colnames(indf)

In [None]:
makeSeuratObj = function(matrix, proj, minUMIs, plots)
{
    obj = CreateSeuratObject(matrix, project=proj)
    print("Renaming Cells")
    obj <- RenameCells(obj, add.cell.id=proj)
    
    print(paste("Seurat obj project", obj@project.name))
    
    #obj[["percent.mtrp"]] <- PercentageFeatureSet(obj, pattern = "^mt-|^Rps|^Rpl")
    
    mtPattern = "^MT-"
    rplPattern = "^RPL"
    rpsPattern = "^RPS"
    rpPattern = "^RPS|^RPL"
    
    selGenes = rownames(obj)[grepl(rownames(obj), pattern=mtPattern)]
    print(paste("Got a total of mt-Genes:", length(selGenes), paste0(head(unlist(selGenes)), collapse=", ")))
    
    selGenes = rownames(obj)[grepl(rownames(obj), pattern=rplPattern)]
    print(paste("Got a total of Rpl-Genes:", length(selGenes), paste0(head(unlist(selGenes)), collapse=", ")))
    
    selGenes = rownames(obj)[grepl(rownames(obj), pattern=rpsPattern)]
    print(paste("Got a total of Rps-Genes:", length(selGenes), paste0(head(unlist(selGenes)), collapse=", ")))
    
    selGenes = rownames(obj)[grepl(rownames(obj), pattern=rpPattern)]
    print(paste("Got a total of Rp-Genes:", length(selGenes), paste0(head(unlist(selGenes)), collapse=", ")))
    
    obj[["percent.mt"]] <- PercentageFeatureSet(obj, pattern = mtPattern)
    obj[["percent.rpl"]] <- PercentageFeatureSet(obj, pattern = rplPattern)
    obj[["percent.rps"]] <- PercentageFeatureSet(obj, pattern = rpsPattern)
    obj[["percent.rp"]] <- PercentageFeatureSet(obj, pattern = rpPattern)
    
    if (plots)
    {
      plot1 <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "percent.mt")
      plot2 <- FeatureScatter(obj, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
      show(plot1 + scale_x_continuous(n.breaks = 20) + scale_y_continuous(n.breaks = 20))

    }
    

    # mt content: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6072887/    
    return(obj)
}

In [None]:
obj.bulk = makeSeuratObj(indf, "bulk", 1000, TRUE)

In [None]:
dim(indf)

In [None]:
obj.bulk

In [None]:
VlnPlot(obj.bulk, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rp"), ncol = 4, pt.size = 0.001, group.by = "orig.ident")

In [None]:
obj.bulk <- subset(obj.bulk, subset = nFeature_RNA > 200 & nCount_RNA > 1000)
obj.bulk

In [None]:
VlnPlot(obj.bulk, features = c("nFeature_RNA", "nCount_RNA", "percent.mt", "percent.rp"), ncol = 4, pt.size = 0.001, group.by = "orig.ident")

In [None]:
obj.bulk <- NormalizeData(obj.bulk, normalization.method = "LogNormalize", scale.factor = 10000)
obj.bulk <- FindVariableFeatures(obj.bulk, nfeatures = 2000)
obj.bulk <- ScaleData(obj.bulk, verbose = FALSE)


In [None]:
obj.bulk <- RunPCA(obj.bulk, npcs = 30, verbose = FALSE)

In [None]:
obj.bulk=JackStraw(obj.bulk, dims = 30)
obj.bulk=ScoreJackStraw(obj.bulk, dims = 1:30)

JackStrawPlot(obj.bulk, dims = 1:10)

In [None]:
JackStrawPlot(obj.bulk, dims = 1:20)

In [None]:
obj.bulk <- RunUMAP(obj.bulk, reduction = "pca", dims = 1:30)
obj.bulk <- FindNeighbors(obj.bulk, reduction = "pca", dims = 1:30)
obj.bulk <- FindClusters(obj.bulk, resolution = 1)

In [None]:
options(repr.plot.width=8, repr.plot.height=8)

p=DimPlot(obj.bulk, reduction="umap", group.by="orig.ident")
save_plot(p, "/mnt/t/rnaseq/kami_nasal_swab/seurat/umap_ident", 12,12)

In [None]:
options(repr.plot.width=24, repr.plot.height=8)
DimPlot(obj.bulk, reduction="umap", group.by="orig.ident", split.by="orig.ident")
options(repr.plot.width=8, repr.plot.height=8)


In [None]:
interferonGenesLG = c("MX1","MX2","MT2A","RSAD2","SIGLEC1","IFIT1","IFI44","IFI27","IFITM10","IFIT3","IFI6","IFIT1P1","IFIT2","IFITM9P","IFI30","IFITM3","IFI44L","IFIT1B","IFITM5","IFITM3P2","IFITM3P9","IFI27L2","IFIT5","IFITM1","IFIH1","IFI16","IFI35","IFI27L1","IRF6","IRF7","IRF9","IRF5","IRF4","IRF2BP2","IRF2BPL","IRF3","IRF1-AS1","ISG15","ISG20L2","ISG20","XAF1","LY6E","IFITM2","IFITM3P6","IFITM3P3","IFIT6P","IRF2BP1","IRF5P1","IRF2","IRF1","IRF8","IFITM3P7","IFITM4P","IFITM3P1","IFITM3P8")
interferonGenes = c("MT2A", "ISG15", "LY6E", "IFIT1", "IFIT2", "IFIT3", "IFITM1", "IFITM3", "IFI44L", "IFI6", "MX1", "IFI27",  "IFI44L", "RSAD2", "SIGLEC1", "IFIT1", "ISG15")

In [None]:
for (i in 1:30)
{
 pcaIgenes = PCASigGenes(obj.bulk, pcs.use = c(i), pval.cut=0.01, max.per.pc = NULL)    
    
    print(paste(i, length(pcaIgenes), length(intersect(pcaIgenes, interferonGenesLG)),  length(intersect(pcaIgenes, interferonGenes))))
}

In [None]:
saveRDS(obj.bulk, "/mnt/t/rnaseq/kami_nasal_swab/nasal_seurat_obj2.rds")

In [None]:
#obj.bulk = readRDS("/mnt/t/rnaseq/kami_nasal_swab/nasal_seurat_obj2.rds")

In [None]:
cellnames = names(obj.bulk$orig.ident)

mpoint = unlist(lapply(str_split(cellnames, "_"), function(x){return(x[2])}))

mpoint_fact = factor(as.factor(mpoint), levels=c('daysctrl',"cknorm", "ckint", 'days6', 'days14','days95'))


mpoint[mpoint == "days6"] = "Day6"
mpoint[mpoint == "days14"] = "Day14"
mpoint[mpoint == "days95"] = "Day95"
mpoint[mpoint == "daysctrl"] = "Ctrl"
mpoint[mpoint == "cknorm"] = "CK Norm."

mpoint2_fact = factor(as.factor(mpoint), levels=c('CK Norm.', 'Ctrl', 'Day6','Day14', 'Day95'))

mpointcolors = as.vector(mpoint)
mpointcolors[mpointcolors == "Ctrl"] = "#1F1F1F"
mpointcolors[mpointcolors == "CK Norm."] = "#E93A8D"
mpointcolors[mpointcolors == "Day6"] = "#069038"
mpointcolors[mpointcolors == "Day14"] = "#5DE223"
mpointcolors[mpointcolors == "Day95"] = "#CCCFCF"


obj.bulk$colors = mpointcolors

mpoint2Colors = c("#E93A8D", "#1F1F1F","#069038","#5DE223","#CCCFCF")

obj.bulk$mpoint = mpoint_fact
obj.bulk$mpoint2 = mpoint2_fact
obj.bulk$celltype = obj.bulk$orig.ident

obj.bulk$ct_time = paste(obj.bulk$celltype, obj.bulk$mpoint2)

In [None]:
levels(obj.bulk$mpoint2)

In [None]:
head(names(obj.bulk$orig.ident))

In [None]:
DimPlot(obj.bulk, reduction="pca", group.by="orig.ident", dims=c(1,2)) + xlim(c(-15,15))+ylim(c(-25,25))

In [None]:

options(repr.plot.width=8, repr.plot.height=8)

obj.plot = obj.bulk
print(obj.plot)

xlimVec = c(-15,15)
ylimVec = c(-10, 20)

p=DimPlot(obj.plot, reduction="pca", group.by="mpoint2", dims=c(1,2)) + theme(legend.position="bottom")+labs(title=element_blank(),suptitle=element_blank())+ xlim(xlimVec)+ylim(ylimVec)
p$data$ct_time = obj.plot$ct_time
p$data$colors = obj.plot$colors
p = p+scale_color_manual(values = mpoint2Colors)

dens1 <- ggplot(p$data, aes(x = PC_1, fill = mpoint2))+scale_fill_manual(values = mpoint2Colors) + 
  geom_density(alpha = 0.4) + 
  theme_void() + xlim(xlimVec)+
  theme(legend.position = "none")

dens2 <- ggplot(p$data, aes(x = PC_2, fill = mpoint2))+scale_fill_manual(values = mpoint2Colors) + 
  geom_density(alpha = 0.4) + 
  theme_void() + 
  theme(legend.position = "none") + 
  coord_flip() + xlim(ylimVec)

mp = dens1 + plot_spacer() + p + dens2 + plot_layout(ncol = 2, nrow = 2, widths = c(4, 1), heights = c(1, 4))

    if (is.null(title))
        {
        title=selCelltype
    }
    
title <- ggdraw() + draw_label("", fontface='bold')
p=cowplot::plot_grid(title, mp, ncol = 1, rel_heights = c(0.05, 1) )

save_plot(p, paste("/mnt/t/rnaseq/kami_nasal_swab/seurat/pca_mpoint_dist", sep=""), 8, 8)


In [None]:
obj.plot = obj.bulk
print(obj.plot)

xlimVec = c(-6,6)
ylimVec = c(-6, 6)

p=DimPlot(obj.plot, reduction="umap", group.by="mpoint2", dims=c(1,2)) + theme(legend.position="bottom")+labs(title=element_blank(),suptitle=element_blank())+ xlim(xlimVec)+ylim(ylimVec)
p$data$ct_time = obj.plot$ct_time
p$data$colors = obj.plot$colors
p = p+scale_color_manual(values = mpoint2Colors)

dens1 <- ggplot(p$data, aes(x = UMAP_1, fill = mpoint2))+scale_fill_manual(values = mpoint2Colors) + 
  geom_density(alpha = 0.4) + 
  theme_void() + xlim(xlimVec)+
  theme(legend.position = "none")

dens2 <- ggplot(p$data, aes(x = UMAP_2, fill = mpoint2))+scale_fill_manual(values = mpoint2Colors) + 
  geom_density(alpha = 0.4) + 
  theme_void() + 
  theme(legend.position = "none") + 
  coord_flip() + xlim(ylimVec)

mp = dens1 + plot_spacer() + p + dens2 + plot_layout(ncol = 2, nrow = 2, widths = c(4, 1), heights = c(1, 4))

    if (is.null(title))
        {
        title=selCelltype
    }
    
title <- ggdraw() + draw_label("", fontface='bold')
p=cowplot::plot_grid(title, mp, ncol = 1, rel_heights = c(0.05, 1) )

save_plot(p, paste("/mnt/t/rnaseq/kami_nasal_swab/seurat/umap_mpoint_dist", sep=""), 8, 8)


In [None]:
FeaturePlot(obj.bulk, "percent.mt")

In [None]:
writeLines(paste(obj.bulk$percent.mt[obj.bulk$percent.mt > 15]))

In [None]:
p=DimPlot(obj.bulk, reduction="pca", group.by="mpoint2")
save_plot(p, "/mnt/t/rnaseq/kami_nasal_swab/seurat/pca_mpoint", 12, 12)

In [None]:
p=DimPlot(obj.bulk, reduction="umap", group.by="mpoint")
save_plot(p, "/mnt/t/rnaseq/kami_nasal_swab/seurat/umap_mpoint", 12, 12)

In [None]:
cells.daysctrl = names(obj.bulk$orig.ident[obj.bulk$orig.ident == "daysctrl"])
cells.cknorm = names(obj.bulk$orig.ident[obj.bulk$orig.ident == "cknorm"])
cells.days6 = names(obj.bulk$orig.ident[obj.bulk$orig.ident == "days6"])
cells.days14 = names(obj.bulk$orig.ident[obj.bulk$orig.ident == "days14"])
cells.days95 = names(obj.bulk$orig.ident[obj.bulk$orig.ident == "days95"])

print(length(cells.daysctrl))
print(length(cells.cknorm))
print(length(cells.days6))
print(length(cells.days14))
print(length(cells.days95))

In [None]:
makesummary = function(a, suffix)
{
  out = {}
  out["num"] = length(a)
  
  if (length(a) == 0)
  {
    f = c(0,0,0,0,0)
    meanA = 0
  } else {
    f = fivenum(a)
    meanA = mean(a)
  }

  out["min"] = f[1]
  out["lower_hinge"] = f[2]
  out["median"] = f[3]
  out["upper_hinge"] = f[4]
  out["max"] = f[5]
  out["mean"] = meanA
  
  names(out) = paste(names(out), suffix, sep=".")
  
  return(out)
}

getExprData = function(markerObj, markerCells, sampleSuffix, assay="RNA")
{
  expTable = GetAssayData(object = subset(x=markerObj, cells=markerCells), slot = "data", assay=assay)
  allgenes = rownames(expTable)
  cellnames = colnames(expTable)

  expt.r = as(expTable, "dgTMatrix")
  expt.df = data.frame(r = expt.r@i + 1, c = expt.r@j + 1, x = expt.r@x)

  DT <- data.table(expt.df)
  res = DT[, as.list(makesummary(x, sampleSuffix)), by = r]
  res[[paste("anum", sampleSuffix, sep=".")]] = length(cellnames)
  res$gene = allgenes[res$r]
  
  res = res[,r:=NULL]
  
  return(res)
}

getDEXpressionDF = function ( scdata, markers, assay="SCT" )
{

outDF = NULL
DefaultAssay(object=scdata) = assay  
clusterIDs = as.character(sort(unique(Idents(scdata))))

scCells = Idents(scdata)
scCells = names(scCells)
scCells = unlist(as.character(scCells))

for (clusterID in clusterIDs){
    
    print(clusterID)
    
    cellIdents = Idents(scdata)
    cellIdents.c = names(cellIdents[cellIdents == clusterID])
    cellIdents.c = unlist(lapply(cellIdents.c, as.character))  
    
    cellIdents.bg = setdiff(unlist(lapply(names(cellIdents), as.character)), cellIdents.c)
    
    expvals = getExprData(scdata, cellIdents.c, "cluster", assay=assay)
    expvals.bg = getExprData(scdata, cellIdents.bg, "bg", assay=assay)

    modmarkers = markers[[clusterID]]
    modmarkers$gene = rownames(modmarkers)
    
    markerdf = as.data.frame(modmarkers)
    
    if ((nrow(markerdf) > 0) && (nrow(expvals) > 0))
    {
      expvals = merge(markerdf, expvals, all.x=T, by.x="gene", by.y = "gene")  
    }
    
    if ((nrow(expvals) > 0) && (nrow(expvals.bg) > 0))
    {
      expvals = merge(expvals, expvals.bg, all.x=T, by.x="gene", by.y = "gene")  
    }
    
    expvals = as.data.frame(cbind(clusterID, expvals))
    
    if (!is.data.frame(outDF) || nrow(outDF)==0)
    {
    outDF = expvals
    } else {
    outDF = as.data.frame(rbind(outDF, expvals))
    }
    
}

return(outDF)

}

makeDEResults = function(inobj, assay="SCT", test="wilcox")
{
  clusterIDs = as.character(sort(unique(Idents(inobj))))
  
  retList = list()
  
  for(clusterID in clusterIDs)
  {
  
  
      cellIdents = Idents(inobj)
      cellIdents.c = names(cellIdents[cellIdents == clusterID])
      cellIdents.c = unlist(lapply(cellIdents.c, as.character))
  
      print(paste("Processing cluster", clusterID, "with a total of", length(cellIdents.c), "cells"))
  
      deMarkers = FindMarkers(inobj, assay=assay, ident.1 = cellIdents.c, test.use=test)
  
  
      retList[[clusterID]] = deMarkers
  
  }
  
  return(retList)

}


compareCells = function(scdata, cellsID1, cellsID2, suffix1, suffix2, prefix="cluster", test="t", assay="RNA", outfolder="./", all=FALSE)
{
    logfc.threshold = 0.25
    
    if (all==TRUE)
    {
    logfc.threshold = 0.01  
    }
    
    print(paste("Missing cells", cellsID1[!cellsID1 %in% colnames(scdata)]))
    print(paste("Missing cells", cellsID2[!cellsID2 %in% colnames(scdata)]))
    
    print(paste("Selected Cells 1", length(cellsID1)))
    print(paste("Selected Cells 2", length(cellsID2)))
    
    if ((length(cellsID1) < 3) || (length(cellsID2) < 3))
    {
      return(data.frame())
    }

    markers = FindMarkers(scdata, assay=assay, ident.1 = cellsID1, ident.2 = cellsID2, test.use=test, logfc.threshold=logfc.threshold)
    
    outvalues1 = getExprData(scdata, cellsID1, suffix1, assay=assay)
    outvalues2 = getExprData(scdata, cellsID2, suffix2, assay=assay) 
    
    
    markers$gene = rownames(markers)
    joinedData = merge(markers, outvalues1, by="gene", all=T)
    joinedData = merge(joinedData, outvalues2, by="gene", all=T)  
    
    joinedData = joinedData[!is.na(joinedData$p_val),]
    
    outfile = paste(outfolder, "/", prefix, ".", suffix1, "_", suffix2, ".tsv", sep="")
    message(outfile)
    write.table(joinedData, file=outfile, row.names = F,  quote=FALSE, sep='\t')
    
    outfile = paste(outfolder, "/", prefix, ".", suffix1, "_", suffix2, ".xlsx", sep="")
    message(outfile)
    write.xlsx(joinedData, file=outfile, row.names = F)
    
    return(joinedData)
}



In [None]:


geneExpr = list()

allCellTypes = list(
  "cknorm" = names(obj.bulk$orig.ident[obj.bulk$orig.ident=="cknorm"]),
  "daysctrl" = names(obj.bulk$orig.ident[obj.bulk$orig.ident=="daysctrl"]),
  "days6" = names(obj.bulk$orig.ident[obj.bulk$orig.ident=="days6"]),
  "days14" = names(obj.bulk$orig.ident[obj.bulk$orig.ident=="days14"]),
  "days95"=names(obj.bulk$orig.ident[obj.bulk$orig.ident=="days95"]),
  "All"=names(obj.bulk$orig.ident)
)

for (cellSubsetName in names(allCellTypes))
{
    print(cellSubsetName)
    
    cellsubset = allCellTypes[[cellSubsetName]]

    print(cellSubsetName)
    selCells = cellsubset
    print(length(selCells))
    outvalues1 = getExprData(obj.bulk, selCells, cellSubsetName, assay="RNA")

    geneExpr[[cellSubsetName]] = outvalues1
    
        
}



In [None]:
prepareFuzzyDataExpr = function(deObj, celltype, use.quantiles=c(0.25, 0.75))
{
    
    deDFs = deObj[[celltype]]
    

    celltype.logfcs = c()
    short.results = list()

    my_merge_outer <- function(df1, df2){                                # Create own merging function
      merge(df1, df2, by = "gene", all=TRUE)
    }

    for (condtp in names(deDFs))
    {
      #print(colnames(deDFs[[condtp]]))
      colname = paste("mean", condtp, sep=".")
      celltype.logfcs = c(celltype.logfcs, as.vector(deDFs[[condtp]][[colname]]))
    }
    
    print(head(celltype.logfcs))
    celltype.logfcs = celltype.logfcs[celltype.logfcs>0]
    


    createBins = function(indf, col.fc, col.sig, bounds)
    {

      smalldf = indf[, c("gene", col.fc, col.sig)]
      colnames(smalldf) = c("gene", "fc", "sig")

      binFun = function(x)
      {

        fc = as.numeric(x[2])
        sig = as.numeric(x[3])

        #print(x)
        #print(bounds)

        if ((is.na(sig)) || is.nan(sig) ||(sig > 0.05) || is.na(fc) || is.nan(fc))
        {
          return(0)
        }

        curBin = 0

        if (fc > bounds[length(bounds)])
        {
          return(length(bounds))
        }

        for (i in 1:length(bounds))
        {
          #print(paste(i, curBin+i-1, fc, "<", bounds[i], fc < bounds[i]))
          if (fc < bounds[i])
          {
            return(curBin + i -1)
          }
        }

        print(x)
        return(10)
      }


      bins = apply(smalldf, 1, binFun)

      return(bins)

    }

    #testDF = data.frame(gene=c("gene1", "gene2", "gene3", "gene4", "gene5"),
    #                    log2FC = c(-1, -0.55, 0.1, 0.55, 1),
    #                    p_val_adj = c(0.01, 0.01, 0.01, 0.01,0.01)
    #)
    #createBins(testDF, "log2FC", "p_val_adj", c(-0.75, -0.5, 0.5, 0.75))


    quantileVec = as.numeric(quantile(abs(celltype.logfcs), probs = use.quantiles, na.rm=T))
    # using expression values!#quantileVec = c(-rev(quantileVec), quantileVec)
    print(paste("Quantile Vector"))
    print(quantileVec)



      condtp.results = list()
      for (tp in names(deDFs))
      {

          celltype.logfcs = c(celltype.logfcs, deDFs[[condtp]][[tp]]$avg_log2FC)

          resDF = data.frame(gene=deDFs[[tp]]$gene)
          
          colname = paste("mean", tp, sep=".")
          resDF[[paste("avg_log2FC", tp, sep=".")]] = as.vector(deDFs[[tp]][[colname]])
          resDF[[paste("p_val_adj", tp, sep=".")]] = 0
          resDF[[paste("bins", tp, sep=".")]] = bins = createBins(resDF, paste("avg_log2FC", tp, sep="."),
                                                                          paste("p_val_adj", tp, sep="."), quantileVec)

          condtp.results[[tp]] = resDF
      }

    celltype.results = Reduce(my_merge_outer, condtp.results) 



    for (binID in grep("^bins.", colnames(celltype.results), value = T))
    {
      celltype.results[[binID]][is.na(celltype.results[[binID]])] = 0

    }

    return(celltype.results)
    
}

In [None]:
makeFuzzyPlot = function( resultsDF, selState = "sympt", list.name=NULL, logged=T, filterThresh=0, use.levels=list("-2"="DOWN", "-1"="down", "0"="No Reg.", "1"="up", "2"="UP"),map.labels=NULL, highlight=NULL, breaks=500, title="", filter.noreg=F, bins.name=NULL)
{
  
statePattern = paste("\\.", selState, sep="")

ctr = resultsDF[, c("gene", grep(statePattern, grep("^bins.", colnames(resultsDF), value = T), value = T))]
#ctr$bins.0ctrl.tp0 = 0

ctr = ctr[, c("gene", sort(grep("^bins.", colnames(ctr), value = T)))]
rownames(ctr) = ctr$gene
inGenes = ctr$gene
    
ctr$gene = NULL

#print(ctr)


num2str = use.levels

for (bin_col in grep("^bins.", colnames(ctr), value = T))
{
  
  strVals = sapply(ctr[[bin_col]], function(x){num2str[[as.character(x)]]})
  strVals = as.factor(strVals)
  strVals = factor(strVals, levels=as.character(use.levels))
  
  ctr[[bin_col]] = strVals
  
  
}

#print(ctr)

if (logged)
{
  nfunc = function(x){return(log(x))}
  nfuncState = paste("n > ",filterThresh,"; log(n); ", sep="")
  
  yLabText = "Logged Gene Count"
} else {
  nfunc = function(x){return(x)}
  nfuncState = paste("n > ",filterThresh,"; ", sep="")
  
  yLabText = "Gene Count"
}

ctr[["highlighted"]] = 0
    
if (!is.null(bins.name))
{
  
  write.table(ctr, file=paste(bins.name, "ctr", "txt", sep="."))
  ctr2 = as.data.frame(ctr)
  ctr2$gene = rownames(ctr)
  write_xlsx(ctr2, paste(bins.name, "ctr", "xlsx", sep="."))
}
    
if (is.null(highlight))
{
  
  ctrFeature = ctr %>%
  dplyr::group_by(!!!syms(c(sort(grep("^bins.", colnames(ctr), value = T)), "highlighted"))) %>%
  dplyr::summarise(
    n = nfunc(n())  ) %>% dplyr::filter(n>nfunc(filterThresh)) %>% dplyr::arrange(desc(n))

    print(ctrFeature)
} else {
  
  
  ctr[highlight, c("highlighted")] = 1
  
  ctrFeature = ctr %>%
  dplyr::group_by(!!!syms(c(sort(grep("^bins.", colnames(ctr), value = T)), "highlighted"))) %>%
  dplyr::summarise(
    n = nfunc(n())  ) %>% dplyr::filter(highlighted==1 | n>nfunc(filterThresh)) %>% dplyr::arrange(desc(-highlighted,n))

}
    
fstate = "" 
if (filter.noreg)
{

ctrSmall = ctrFeature[grep("^bins.", colnames(ctr), value = T)]
ctrUse = rowSums(ctrSmall == "Unexpr.") != length(grep("^bins.", colnames(ctr), value = T))   
ctrFeature = ctrFeature[ctrUse | (ctrFeature$highlighted==1),]
    
    fstate = " filtered all Unexpr."
}
    

allgroups = paste("group", 1:nrow(ctrFeature), sep="")
allgroups = as.factor(allgroups)
allgroups = factor(allgroups, levels=paste("group", 1:nrow(ctrFeature), sep=""))

ctrFeature$group = allgroups
    
if (!is.null(list.name))
{
grpgenes = list()
binCols = sort(grep("^bins.", colnames(ctrFeature), value = T))
for (i in 1:nrow(ctrFeature)) {

    
    grpVec = as.vector(ctrFeature[i, binCols])
    grp = as.character(ctrFeature[i, "group"])
    
    for (j in 1:nrow(ctr))
    {
        rVec = as.vector(ctr[j,binCols])
        
        #print(sum(rVec == grpVec))
        if (sum(rVec == grpVec) == length(binCols))
        {
            #print(paste(grp, j, inGenes[j]))
            #print()
            grpgenes[[grp]] = c(grpgenes[[grp]], inGenes[j])
        }
    }
}
    

    resgrps = list()
    fname = paste(list.name, ".txt", sep="")
    print(fname)
        write(paste(binCols, collapse=", "), fname)

for (grp in names(grpgenes))
{
    
    subdf = ctrFeature[ctrFeature$group == paste("group", grp, sep=""), binCols]
    subdf = sapply(subdf[, binCols], as.character)
    dfvec = as.vector(subdf)
    resgrps[[grp]] = as.character(paste(paste("group", grp, sep=""), paste(dfvec, collapse=", "), paste(grpgenes[[grp]], collapse=", ")))

    write(paste(resgrps[[grp]]), fname, append=TRUE)

    
}
   
    }

aesMappings = list("y"="n")
i=1
    
use.labels = c()
if (!is.null(map.labels))
{
    for (axisCol in names(map.labels))
    {
      axisName = paste("axis", as.character(i), sep="")
      aesMappings[[axisName]] = axisCol
        
        use.labels = c(use.labels, map.labels[axisCol])
      i=i+1
    }
} else {
    for (axisCol in sort(grep("^bins.", colnames(ctr), value = T)))
    {
      axisName = paste("axis", as.character(i), sep="")
      aesMappings[[axisName]] = axisCol
        
        use.labels = c(use.labels, axisCol)
      i=i+1
    }
}
    


#print(aesMappings)
print(create_aes(aesMappings))
print(nrow(ctrFeature))

getPalette = colorRampPalette(brewer.pal(10, "Set3"))

colorValues = getPalette(nrow(ctrFeature))
colorValues[ctrFeature$highlighted == 1] = "#000000" #"#FF0000"

ctrFeature$highcolor = colorValues
#print(ctrFeature)

nLabels = length(grep("^bins.", colnames(ctrFeature), value = T))

geneCount = sum(ctrFeature$n)

if (!is.null(highlight))
{
      print(ctrFeature[ctrFeature$highlighted == 1,])

}
    
p=ggplot(as.data.frame(ctrFeature), create_aes(aesMappings)) +
  geom_alluvium(aes(fill = group, colour = group), width = 0, knot.pos = 0, reverse = FALSE, curve_type="quintic") +
  scale_fill_manual(values = colorValues)+
  scale_color_manual(values = colorValues)+
  guides(colour=FALSE,fill=FALSE) +
  geom_stratum(width = 1/16, reverse = FALSE) +
  geom_text(stat = "stratum", aes(label = after_stat(stratum)), reverse = FALSE, angle=90) +
  scale_x_continuous(breaks = 1:nLabels, labels = use.labels)+
  scale_y_continuous(limits = c(0, geneCount), breaks = seq(0, geneCount, by = breaks))+
  labs(
    title=paste(title, " Binned Expressions per TimePoint (", nfuncState, selState, fstate, ")", sep="")
  )+
  xlab("Timepoints")+
  ylab(yLabText)+
  theme(
    panel.background = element_blank(),
    axis.text.y = element_text(face="bold", size=14),
    axis.title.y = element_text(face="bold", size=24),
    axis.text.x = element_text(face="bold", size=14),
    axis.title.x = element_text(face="bold", size=24),
  )
    
    
return(p)
}


In [None]:
use.geneexpr = list("All"=geneExpr)

In [None]:
fuzzy.expr.all = prepareFuzzyDataExpr(use.geneexpr, "All", use.quantiles = c(0.75,0.9, 0.95))

In [None]:
options(repr.plot.width=12, repr.plot.height=12)
p=makeFuzzyPlot(fuzzy.expr.all, "", logged=F, bins.name ="/mnt/t/rnaseq/kami_nasal_swab/seurat/fuzzy_nasal_all_genes", filterThresh = 10, use.levels=list("0"="Unexpr.", "1"="high", "2"="High", "3"="HIGH"), map.labels = list("bins.cknorm"="CK Normal","bins.daysctrl"="Ctrl", "bins.days6"="Early",   "bins.days14"="Intermed.", "bins.days95"="Late", "bins.All"="All"), breaks=1000)
#"bins.ckint"="CK Intensive"

save_plot(p, "/mnt/t/rnaseq/kami_nasal_swab/seurat/fuzzy_nasal_all_genes", 12, 12)

In [None]:
options(repr.plot.width=12, repr.plot.height=12)
p=makeFuzzyPlot(fuzzy.expr.all, "", highlight = interferonGenes, bins.name ="/mnt/t/rnaseq/kami_nasal_swab/seurat/fuzzy_nasal_expressed_genes", logged=F, filter.noreg=T, filterThresh = 10, use.levels=list("0"="Unexpr.", "1"="high", "2"="High", "3"="HIGH"), map.labels = list("bins.cknorm"="CK Normal","bins.daysctrl"="Ctrl", "bins.days6"="Early",   "bins.days14"="Intermed.", "bins.days95"="Late", "bins.All"="All"), breaks=1000)

save_plot(p, "/mnt/t/rnaseq/kami_nasal_swab/seurat/fuzzy_nasal_expressed_genes", 12, 12)

In [None]:
grep("^bins", colnames(fuzzy.expr.all), value=T)

In [None]:
fuzzy.expr.all.isg = fuzzy.expr.all[fuzzy.expr.all$gene %in% interferonGenes,]

options(repr.plot.width=15, repr.plot.height=8)
p=makeFuzzyPlot(fuzzy.expr.all.isg, "", logged=F, bins.name ="/mnt/t/rnaseq/kami_nasal_swab/seurat/fuzzy_nasal_isg_genes", list.name = "/mnt/t/rnaseq/kami_nasal_swab/seurat/fuzzy_nasal_isg_genes", filterThresh = 0, use.levels=list("0"="Unexpr.", "1"="high", "2"="High", "3"="HIGH"), map.labels = list("bins.cknorm"="CK Normal","bins.daysctrl"="Ctrl", "bins.days6"="Early",   "bins.days14"="Intermed.", "bins.days95"="Late", "bins.All"="All"), breaks=5)

save_plot(p, "/mnt/t/rnaseq/kami_nasal_swab/seurat/fuzzy_nasal_isg_genes", 12, 12)

In [None]:
p=DotPlot(obj.bulk, features = unique(interferonGenes), group.by="mpoint2")+ggtitle("DotPlot of ISG genes")+coord_flip()
plot(save_plot(p, paste("/mnt/t/rnaseq/kami_nasal_swab/seurat/dotplot_isg_per_timepoint"), fig.width=12, fig.height=10))

In [None]:
save.image("/mnt/t/rnaseq/kami_nasal_swab/seurat/nasal_fuzzy2.rdata")

In [None]:
table(obj.bulk$orig.ident)

In [None]:
#load("/mnt/t/rnaseq/kami_nasal_swab/seurat/nasal_fuzzy.rdata")