In [None]:
library(ggplot2)
library(extrafont)
library(stringr)
library(heatmaply)
library(ggrepel)
library(reticulate)
library(gridExtra)
library(robustbase)

In [None]:
# Load external Python functions
source_python("reticulate_functions.py")
# Load extra fonts
ttf_import(paths = ".fonts")
loadfonts()
# Set this to true for faster compilation or false for more precise results and all outputs
TEST_MODE = F

In [None]:
# Number of permutations used for empirical p-value computations
PERM_NB_ITER = ifelse(TEST_MODE, 20, 2000)

In [None]:
# Customize ggplot appearance

# Change theme
customTheme <- theme_light() + 
               theme(panel.grid.minor=element_blank(), text=element_text(size=17, family="Arial", colour = "#333333"),
                     line=element_line(colour = "#333333"), legend.background = element_rect(fill=alpha('#CCCCCC', 0.1)), legend.key = element_blank())

# Change default colors
scale_colour_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_colour_gradient(...), 
        viridis = scale_colour_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))
}
scale_color_continuous <- scale_colour_continuous

scale_fill_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_fill_gradient(...), 
        viridis = scale_fill_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))

}

cemm_pal = colorRampPalette(c("#5A463C", "#008CAD", "#40B9D4", "#D4ECF2", "#D2323C", "#F8B100", "#DFDC00"))
cust_pal = colorRampPalette(c("#008CAD", "#40B9D4", "#D4ECF2", "#F8B100", "#C00000", "#2D0000"))
yolla_pal = colorRampPalette(c('#FD0F91', '#C00000', '#2D0000'))
scale_fill_discrete <- function (..., type = "Cust", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "fill") 
{
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else if (type == "Yolla"){
        discrete_scale(aesthetics, "Yolla", yolla_pal, na.value = na.value, ...)
    } else if (type == "Cust"){
        discrete_scale(aesthetics, "Cust", cust_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}

scale_color_discrete <- function (..., type = "Cust", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "colour") {
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else if (type == "Cust"){
        discrete_scale(aesthetics, "Cust", cust_pal, na.value = na.value, ...)
    } else if (type == "Yolla"){
        discrete_scale(aesthetics, "Yolla", yolla_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", scales::hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}
scale_colour_discrete <- scale_color_discrete

noGridTheme <- function(...){
    theme(panel.grid.major=element_blank(), axis.text.x=element_text(size=12), axis.text.y=element_text(size=12),
                      axis.line=element_line(color="#333333", size = 0.2), panel.border = element_blank(), ...)
}

darkTheme <- function(...){
    theme(panel.background = element_rect(fill = '#333333'), plot.background = element_rect(fill = '#333333'), 
          axis.line=element_line(color="#CCCCCC", size = 0.2), 
          text=element_text(size=17, family="Arial", colour = "#CCCCCC"),
          line=element_line(colour = "#CCCCCC"))
}

theme_set(customTheme)

options(repr.plot.width=10, repr.plot.height=10)

## NK92

In [None]:
# Fix t-SNE layout and random number generation
set.seed(38)

In [None]:
NK = read.csv("./allImages_NK_Drugs.csv", header=T)

In [None]:
Layout = read.csv("./DrugPlateLayout.csv", header=T)
Layout$Well <- paste0(Layout$Row, str_pad(Layout$Column, 2, pad = "0"))

In [None]:
NK$Drug = sapply(NK$Metadata_Well, function(x) Layout$Drug[x == Layout$Well])
levels(NK$Drug)[levels(NK$Drug)=="Y27"] <- "Y-27632"
NK$Concentration = sapply(NK$Metadata_Well, function(x) Layout$Concentration[x == Layout$Well])

In [None]:
gpNK = ggplot(NK[!is.na(NK$Count_FilteredNucleus),]) + geom_histogram(aes(Count_FilteredNucleus, fill = as.factor(Metadata_Row)), binwidth=2) +
       scale_fill_discrete(name="Row")
gpNK

if(!TEST_MODE){
    ggsave(filename = "Fig/NK_count.pdf", plot = gpNK)
}

In [None]:
gpNK = ggplot(NK[!is.na(NK$Mean_FilterCytoplasm_AreaShape_Area),]) + geom_histogram(aes(Mean_FilterCytoplasm_AreaShape_Area, 
                                                                                    fill = as.factor(Metadata_Row)), binwidth=100) +
       scale_fill_discrete(name="Row")
gpNK
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_area.pdf", plot = gpNK)
}

### Filtering

In [None]:
FILT_MAX_INT_DNA = 0.05 # Remove empty images and small DNA precipitations
FILT_MIN_CELLS = 4 # 8 seems safe from distribution and images, 3 seems in poor shape
FILT_NB_MAX_NA_IMAGE = 10
FILT_MAX_CORR = 0.6 # Keep uncorrelated variables
dimUMAP = 3

In [None]:
ftToKeep = 1:dim(NK)[2]
# Make sure that the fields are numeric
ftToKeep <- ftToKeep[which(sapply(NK[,ftToKeep], class) == "numeric")]
# Remove execution time and count features
ftToKeep <- ftToKeep[grep("(Execution)|(Count)", colnames(NK)[ftToKeep], invert = T)]

In [None]:
# Store all remaining features before filtering for downstream comparison to selected set
preFiltFt = colnames(NK)[ftToKeep]

In [None]:
# Remove wells with low max DNA intensity
fieldToKeep <- which(NK$ImageQuality_MaxIntensity_DNA >= FILT_MAX_INT_DNA)
# Remove wells with low cell count
fieldToKeep <- fieldToKeep[NK[fieldToKeep,]$Count_FilteredNucleus >= FILT_MIN_CELLS]

In [None]:
# Few bad quality pictures are generating a lot of missing values and are removed
fieldToKeep <- fieldToKeep[rowSums(is.na(NK[fieldToKeep,ftToKeep])) < FILT_NB_MAX_NA_IMAGE]
# Remove remaining features with missing values
ftToKeep <- ftToKeep[colSums(is.na(NK[fieldToKeep,ftToKeep])) == 0] 
# Remove constant columns
indWT = NK[fieldToKeep,]$Drug == "WT"
ftToKeep <- ftToKeep[sapply(NK[fieldToKeep,ftToKeep], function(x) mad(x) != 0)]
ftToKeep <- ftToKeep[sapply(NK[fieldToKeep[indWT],ftToKeep], function(x) mad(x) != 0)]

In [None]:
print(dim(NK))
print(length(ftToKeep))
print(length(fieldToKeep))

In [None]:
# Approximate normal distribution
transfLog <- function (x){
    minVal = min(x)
    return(log(x+1-minVal))
}

# Center and scale on control values
transfNorm <- function(x, y){
    return((x - median(y))/mad(y))
}

In [None]:
transformedNK = apply(NK[fieldToKeep, ftToKeep], 2, transfLog)
transformedNK = apply(transformedNK, 2, function(x) transfNorm(x, x[indWT]))

In [None]:
# Order features from biggest mad to smallest mad
# Since features have mad(WT) = 1, it means that we rank features by how more variable they are
# for drug perturbations than for WT
orderFt = rev(order(apply(transformedNK, 2, mad)))

In [None]:
uncorrFt = uncorrelate(transformedNK, orderCol = orderFt-1, threshold = FILT_MAX_CORR)
uncorrFt = unlist(uncorrFt) + 1

In [None]:
transformedNK = transformedNK[,uncorrFt]

### Look at which types of features are kept

In [None]:
catChannel = c("CorrDNA", "CorrActin", "CorrPerf")
table(rowSums(sapply(catChannel, function(x) grepl(x, colnames(transformedNK)))))
dtCat = data.frame(CountIni = sort(colSums(sapply(catChannel, function(x) grepl(x, preFiltFt)))), 
                   Count = sort(colSums(sapply(catChannel, function(x) grepl(x, colnames(transformedNK))))))

dtCat$Order <- rank(dtCat$CountIni, ties.method = c("first"))
dtCat$Category <- factor(rownames(dtCat), levels=rownames(dtCat)[order(dtCat$CountIni)])
dtCat$Ratio <- dtCat$Count / dtCat$CountIni
dtCat$Ratio <- paste0(round(100*dtCat$Ratio, 1), "%")
gp <- ggplot(dtCat) + geom_bar(aes(Category, weight = CountIni), fill = cust_pal(2)[1]) + ylim(c(0,675)) +
              geom_bar(aes(Category, weight = Count), fill = cust_pal(2)[2]) + 
              geom_text(aes(x = Order, y = CountIni + 2, label = Ratio), hjust = 0) + coord_flip()
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_SelecFt_Channel.pdf", plot = gp, width = 10)
}

In [None]:
catObjects = c("ActinGranules", "FilterCytoplasm", "ShrunkenCytoplasm", "FilteredNucleus", "PerfGranules")
table(rowSums(sapply(catObjects, function(x) grepl(x, colnames(transformedNK)))))
dtCat = data.frame(CountIni = colSums(sapply(catObjects, function(x) grepl(x, preFiltFt))), 
                   Count = colSums(sapply(catObjects, function(x) grepl(x, colnames(transformedNK)))))

dtCat$Order <- rank(dtCat$CountIni, ties.method = c("first"))
dtCat$Category <- factor(rownames(dtCat), levels=rownames(dtCat)[order(dtCat$CountIni)])
dtCat$Ratio <- dtCat$Count / dtCat$CountIni
dtCat$Ratio <- paste0(round(100*dtCat$Ratio, 1), "%")
gp <- ggplot(dtCat) + geom_bar(aes(Category, weight = CountIni), fill = cust_pal(2)[1]) + ylim(c(0,510)) +
              geom_bar(aes(Category, weight = Count), fill = cust_pal(2)[2]) + 
              geom_text(aes(x = Order, y = CountIni + 2, label = Ratio), hjust = 0) + coord_flip()
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_SelecFt_Object.pdf", plot = gp, width = 10)
}

In [None]:
catType = c("Threshold", "Granularity", "ImageQuality", "Texture", "Distance", "AreaShape", "RadialDistribution", "Neighbors", 
            "Correlation", "Intensity", "Overlap", "Location")
which(rowSums(sapply(catType, function(x) grepl(x, colnames(transformedNK)))) == 0) # All features are covered

dtCat = data.frame(CountIni = colSums(sapply(catType, function(x) grepl(x, preFiltFt))), 
                   Count = colSums(sapply(catType, function(x) grepl(x, colnames(transformedNK)))))
dtCat$Order <- rank(dtCat$CountIni, ties.method = c("first"))
dtCat$Category <- factor(rownames(dtCat), levels=rownames(dtCat)[order(dtCat$CountIni)])
dtCat$Ratio <- dtCat$Count / dtCat$CountIni
dtCat$Ratio <- paste0(round(100*dtCat$Ratio, 1), "%")
gp <- ggplot(dtCat) + geom_bar(aes(Category, weight = CountIni), fill = cust_pal(2)[1]) +
              geom_bar(aes(Category, weight = Count), fill = cust_pal(2)[2]) + 
              geom_text(aes(x = Order, y = CountIni + 5, label = Ratio), hjust = 0) + coord_flip()
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_SelecFt_Type_All.pdf", plot = gp, width = 10)
}

In [None]:
catType = c("Granularity", "Texture", "AreaShape", "RadialDistribution",
            "Correlation", "Intensity")
table(rowSums(sapply(catType, function(x) grepl(x, colnames(transformedNK)))))

dtCount = as.data.frame(sapply(catType, function(x) grepl(x, colnames(transformedNK))))
dtCount$Other = !apply(dtCount, 1, any)
dtCountIni = as.data.frame(sapply(catType, function(x) grepl(x, preFiltFt)))
dtCountIni$Other = !apply(dtCountIni, 1, any)
dtCat = data.frame(CountIni = colSums(dtCountIni), 
                   Count = colSums(dtCount))
dtCat$Order <- rank(dtCat$CountIni, ties.method = c("first"))
dtCat$Category <- factor(rownames(dtCat), levels=rownames(dtCat)[order(dtCat$CountIni)])
dtCat$Ratio <- dtCat$Count / dtCat$CountIni
dtCat$Ratio <- paste0(round(100*dtCat$Ratio, 1), "%")
gp <- ggplot(dtCat) + geom_bar(aes(Category, weight = CountIni), fill = cust_pal(2)[1]) +
              geom_bar(aes(Category, weight = Count), fill = cust_pal(2)[2]) + 
              geom_text(aes(x = Order, y = CountIni + 5, label = Ratio), hjust = 0) + coord_flip()
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_SelecFt_Type_Short.pdf", plot = gp, width = 10)
}

In [None]:
# Export list of features kept
if(!TEST_MODE){
    write.table(colnames(transformedNK), file = "Tab/NK_list_features.csv", col.names = F, row.names = F)
}

### Look at the morphological distribution of the fields of view

#### UMAP visualizations

In [None]:
pcaTNK = prcomp(transformedNK, center = T, scale. = T)

In [None]:
sdevExplained = pcaTNK$sdev / sum(pcaTNK$sdev)
# We keep enough components to explain 90% of the variance
nbPCAcomp = min(which(cumsum(sdevExplained) > 0.9)) 
umTNK = t(t(pcaTNK$x[,1:nbPCAcomp]) * sdevExplained[1:nbPCAcomp])

In [None]:
umTNK = as.data.frame(umTNK)
names(umTNK) = c("PC1", "PC2")
umTNK$Row <- as.factor(NK$Metadata_Row[fieldToKeep])
umTNK$Col <- as.factor(NK$Metadata_Column[fieldToKeep])
umTNK$Drug <- as.factor(NK$Drug[fieldToKeep])
umTNK$Conc <- as.factor(NK$Concentration[fieldToKeep])
umTNK$URL <- as.factor(NK$URL_Actin[fieldToKeep])

In [None]:
# Order levels to separate DMSO and WT from drugs
lvDMSO = which(levels(umTNK$Drug) == "DMSO")
lvWT = which(levels(umTNK$Drug) == "WT")
umTNK$Drug <- factor(umTNK$Drug, levels = c("DMSO", rev(levels(umTNK$Drug)[-c(lvDMSO, lvWT)]), "WT"))

In [None]:
umTNK$Shape = ifelse(umTNK$Drug %in% c("DMSO", "WT"), "2", "1")

In [None]:
gp <- ggplot(umTNK) + geom_point(aes(PC1, PC2, color = Drug, shape = Shape)) + 
                      guides(color = guide_legend(reverse = TRUE, 
                               override.aes = list(shape = ifelse(levels(umTNK$Drug) %in% c("DMSO", "WT"), 1, 16))),
                             shape = F) + scale_shape_manual(values = c(16,1))
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_UMAP_Drugs.pdf", plot = gp, width = 10)
}

In [None]:
umXlim = ggplot_build(gp)$layout$panel_scales_x[[1]]$range$range
umYlim = ggplot_build(gp)$layout$panel_scales_y[[1]]$range$range

In [None]:
umTNK$DrugOrControl <- as.factor(ifelse(umTNK$Drug == "DMSO", "DMSO", ifelse(umTNK$Drug == "WT", "WT", "Drug")))

gp <- ggplot(umTNK) + geom_point(aes(PC1, PC2, color = DrugOrControl, shape = Shape)) + 
    theme(legend.title = element_blank()) + guides(color = guide_legend(reverse = TRUE, 
                               override.aes = list(shape = ifelse(levels(umTNK$DrugOrControl) %in% c("DMSO", "WT"), 1, 16))),
                             shape = F) + scale_shape_manual(values = c(16,1))
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_UMAP_Control.pdf", plot = gp, width = 10)
}

In [None]:
# Visualize drug perturbations one by one
getDrugUmap <- function(drugPert){
    drug = levels(umTNK$Drug)[drugPert]
    rowPert = NK[fieldToKeep[NK[fieldToKeep,]$Drug == drug],]$Metadata_Row[1]

    umDrug = umTNK[umTNK$Row == rowPert,]
    cols = cust_pal(length(levels(umTNK$Drug)))[c(length(levels(umTNK$Drug)), drugPert, 1)]
    names(cols) = c("WT", drug, "DMSO")
    gp <- ggplot(umDrug) + geom_point(aes(PC1, PC2, color = Drug, shape = Shape)) + 
        scale_color_manual(values = cols) + ggtitle(drug) +
        theme(legend.title = element_blank(), legend.position = "bottom", legend.key.size = unit(0.08,"cm"),
              legend.text = element_text(size = 10), legend.spacing.x = unit(0.08,"cm"), text=element_text(size=12)) +
        xlim(umXlim) + ylim(umYlim) + scale_shape_manual(values = c(16,1)) +
        guides(color = guide_legend(override.aes = list(shape = c(1, 16, 1))), shape = F)
    return(gp)
}

In [None]:
if(!TEST_MODE){
    gpl = lapply(2:(length(levels(umTNK$Drug))-1), getDrugUmap)
    gp = do.call("grid.arrange", c(gpl, ncol=4))
    gp
    ggsave(filename = "Fig/NK_UMAP_by_drug.pdf", plot = gp, width = 10)
}

#### Distances to WT cells

In [None]:
# Compute the (standard) Mahalanobis Distance (MD) between drugs
drugMD <- function(drugPert){
    # Find row containing this drug and split wells between WT and drug
    rowPert = NK[fieldToKeep[NK[fieldToKeep,]$Drug == drugPert],]$Metadata_Row[1]
    setUmapDrug = umTNK[NK[fieldToKeep,]$Drug == drugPert,1:dimUMAP]
    setUmapWT = umTNK[which(indWT)[NK[fieldToKeep[indWT],]$Metadata_Row == rowPert],1:dimUMAP]
    
    # Ensure that we have enough points to compute distance
    if ((dim(setUmapDrug)[1] < 2*dimUMAP)|(dim(setUmapWT)[1] < 2*dimUMAP)){
            print(drugPert)
            return(NA)
    }
    
    # Compute variance and location estimators and corresponding Mahalanobis Distance
    centerWT = colMeans(setUmapWT)
    covWT = cov(setUmapWT)
    
    MD = median(apply(setUmapDrug, 1, function(x) mahalanobis(x, centerWT, covWT)))
    return(MD)
}

In [None]:
shuffDrugMD <- function(drugPert, nbRep = PERM_NB_ITER){
    # Find row containing this drug and shuffle wells between WT and drug
    rowPert = NK[fieldToKeep[NK[fieldToKeep,]$Drug == drugPert],]$Metadata_Row[1]
    setUmapDrug = umTNK[NK[fieldToKeep,]$Drug == drugPert,1:dimUMAP]
    setUmapWT = umTNK[which(indWT)[NK[fieldToKeep[indWT],]$Metadata_Row == rowPert],1:dimUMAP]
    setUMAP = rbind(setUmapWT,setUmapDrug)
    shuffleMD <- function(notUsed){
        shuffSetUMAP = rbind(setUmapWT,setUmapDrug)[sample(nrow(setUMAP)),]
        # Take random subsets of corresponding sizes
        shuffSetDrug = shuffSetUMAP[1:nrow(setUmapDrug),]
        shuffSetWT = shuffSetUMAP[(nrow(setUmapDrug)+1):(nrow(setUmapDrug)+nrow(setUmapWT)),]
        
        # Ensure that we have enough points to compute distance
        if ((dim(shuffSetDrug)[1] < 2*dimUMAP)|(dim(shuffSetWT)[1] < 2*dimUMAP)){
                print(drugPert)
                return(NA)
        }
        
        # Compute variance and location estimators and corresponding Mahalanobis Distance
        centerWT = colMeans(shuffSetWT)
        covWT = cov(shuffSetWT)

        MD = median(apply(shuffSetDrug, 1, function(x) mahalanobis(x, centerWT, covWT)))
        return(MD)
    }
    return(sapply(1:nbRep, shuffleMD))
}

In [None]:
# Similar procedure for DMSO against WT
dmsoMD <- function(rowDMSO){
    setUmapDMSO = umTNK[(NK[fieldToKeep,]$Drug == "DMSO")&(NK[fieldToKeep,]$Metadata_Row == rowDMSO),1:dimUMAP]
    setUmapWT = umTNK[which(indWT)[NK[fieldToKeep[indWT],]$Metadata_Row == rowDMSO],1:dimUMAP]
   
    # Ensure that we have enough points to compute distance
    if ((dim(setUmapDMSO)[1] < 2*dimUMAP)|(dim(setUmapWT)[1] < 2*dimUMAP)){
            return(NA)
    }
    
    # Compute variance and location estimators and corresponding Mahalanobis Distance
    centerWT = colMeans(setUmapWT)
    covWT = cov(setUmapWT)
    
    MD = median(apply(setUmapDMSO, 1, function(x) mahalanobis(x, centerWT, covWT)))
    return(MD)
}
shuffDmsoMD <- function(rowDMSO, nbRep = PERM_NB_ITER){
    # Find row containing this drug and shuffle wells between WT and drug
    setUmapDMSO = umTNK[(NK[fieldToKeep,]$Drug == "DMSO")&(NK[fieldToKeep,]$Metadata_Row == rowDMSO),1:dimUMAP]
    setUmapWT = umTNK[which(indWT)[NK[fieldToKeep[indWT],]$Metadata_Row == rowDMSO],1:dimUMAP]
    setUMAP = rbind(setUmapWT,setUmapDMSO)
    shuffleMD <- function(notUsed){
        shuffSetUMAP = rbind(setUmapWT,setUmapDMSO)[sample(nrow(setUMAP)),]
        # Take random subsets of corresponding sizes
        shuffSetDMSO = shuffSetUMAP[1:nrow(setUmapDMSO),]
        shuffSetWT = shuffSetUMAP[(nrow(setUmapDMSO)+1):(nrow(setUmapDMSO)+nrow(setUmapWT)),]
        
        # Ensure that we have enough points to compute distance
        if ((dim(shuffSetDMSO)[1] < 2*dimUMAP)|(dim(shuffSetWT)[1] < 2*dimUMAP)){
                return(NA)
        }
        
        # Compute variance and location estimators and corresponding Mahalanobis Distance
        centerWT = colMeans(shuffSetWT)
        covWT = cov(shuffSetWT)

        MD = median(apply(shuffSetDMSO, 1, function(x) mahalanobis(x, centerWT, covWT)))
        return(MD)   
    }
    return(sapply(1:nbRep, shuffleMD))
}

In [None]:
drugMD_NK = sapply(levels(NK$Drug), drugMD)

In [None]:
shuffDrugMD_NK = sapply(levels(NK$Drug), shuffDrugMD)

In [None]:
getMPV <- function(x){
    ecdfMD = ecdf(x[-1])
    return(1 - ecdfMD(x[1]))
}
dfMPV = data.frame(MPV = apply(rbind(drugMD_NK, shuffDrugMD_NK), 2, getMPV))

In [None]:
dfMPV$Drug = rownames(dfMPV)
dfMPV$Strength = drugMD_NK
dfMPV = dfMPV[!dfMPV$Drug %in% c("DMSO", "WT"),]
dfMPV$adjMPV = p.adjust(dfMPV$MPV, method = "fdr")

In [None]:
gp <- ggplot(dfMPV) + geom_point(aes(adjMPV, Strength, color = Drug)) + geom_vline(xintercept = 0.05, color="#CCCCCC", linetype="dashed")
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_RMPV_Drugs.pdf", gp, width = 10)
}

In [None]:
dmsoMD_NK = sapply(1:8, dmsoMD)
shuffDmsoMD_NK = sapply(1:8, shuffDmsoMD)

In [None]:
dmsoMPV = data.frame(MPV = apply(rbind(dmsoMD_NK, shuffDmsoMD_NK), 2, getMPV))
dmsoMPV$Row = rownames(dmsoMPV)
dmsoMPV$Strength = dmsoMD_NK
dmsoMPV$adjMPV = p.adjust(dmsoMPV$MPV, method = "fdr")

In [None]:
gp <- ggplot(dmsoMPV) + geom_point(aes(adjMPV, Strength, color = Row)) + geom_vline(xintercept = 0.05, color="#CCCCCC", linetype="dashed")
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_RMPV_DMSO.pdf", plot = gp, width = 10)
}

### Concentration-dependency

In [None]:
# Compute the Robust Mahalanobis Distance (RMD) for all images of a given drug at a given concentration
drugConcMD <- function(drugPert, concPert){
    # Find row containing this drug and split wells between WT and drug
    rowPert = NK[fieldToKeep[NK[fieldToKeep,]$Drug == drugPert],]$Metadata_Row[1]
    pertInd = (umTNK$Drug == drugPert)&(umTNK$Conc == concPert)
    setUmapDrug = umTNK[pertInd,1:dimUMAP]
    setUmapWT = umTNK[which(indWT)[NK[fieldToKeep[indWT],]$Metadata_Row == rowPert],1:dimUMAP]
    
    # Ensure that we have enough points to compute distance
    if ((dim(setUmapDrug)[1] < 2*dimUMAP)|(dim(setUmapWT)[1] < 2*dimUMAP)){
            print(c(drugPert, concPert))
            return(NA)
    }
    
    # Compute variance and location estimators and corresponding Mahalanobis Distance
    centerWT = colMeans(setUmapWT)
    covWT = cov(setUmapWT)
    
    MD = median(apply(setUmapDrug, 1, function(x) mahalanobis(x, centerWT, covWT)))
    return(MD)
}

In [None]:
# Which drugs should be studied?
sigDrugs = rownames(dfMPV)[dfMPV$adjMPV < 0.05]
# Classify concentration per drug as follows
concentrationLevels = c("Low", "Mid", "High")

getAllDrugConcMD <- function(d, rename = T){
    dConc = levels(droplevels(umTNK[umTNK$Drug == d,]$Conc))
    dList = sapply(dConc, function(c) drugConcMD(d,c), simplify = F)
    if (rename){
        names(dList) <- concentrationLevels[1:length(dConc)]
    }
    return(dList)
}
sigDrugConcMD = sapply(sigDrugs, getAllDrugConcMD, simplify = F)

In [None]:
getSublist <- function(x, y){
    i = which(names(x) == y)
    # If y is not a sublist of x, i is the empty integer (not null, not false but with zero-length)
    if (length(i)){
        return(x[[i]])
}}

concMPV = sapply(concentrationLevels, function(y) unlist(sapply(sigDrugConcMD, function(x) getSublist(x, y))))
concMPV = as.data.frame(Reduce(rbind, Map(cbind, MPV = concMPV, Concentration = names(concMPV))))
concMPV$MPV = as.numeric(concMPV$MPV)
concMPV$Concentration = factor(concMPV$Concentration, levels = concentrationLevels)

In [None]:
gp <- ggplot(concMPV) + geom_violin(aes(Concentration, MPV, fill = Concentration)) + 
                         coord_flip() + theme(legend.title = element_blank()) + guides(fill = guide_legend(reverse = TRUE)) +
                         scale_fill_discrete(type = "Yolla")
gp

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_ConcEffect_All.pdf", plot = gp, width = 10)
}

In [None]:
drugsConcMPV = sapply(sigDrugs, function(x) getAllDrugConcMD(x, rename = F), simplify = F)

for (i in 1:length(drugsConcMPV)){
    # Retrieve drug name and MPV values
    drugName = names(drugsConcMPV)[i]
    drugConcMPV = drugsConcMPV[[i]]
    
    # Structure into wide data frame
    concMPV = as.data.frame(Reduce(rbind, Map(cbind, MPV = drugConcMPV, Concentration = names(drugConcMPV))))
    concMPV$MPV = as.numeric(concMPV$MPV)
    # Reorder concentrations in decreasing numerical order
    concMPV$Concentration = factor(concMPV$Concentration, 
                                levels = as.character(sort(as.numeric(levels(concMPV$Concentration)))))

    # Represent distribution per concentration for all significantly changed drug
    gp <- ggplot(concMPV) + geom_violin(aes(Concentration, MPV, fill = Concentration)) + 
                             coord_flip() + theme(legend.title = element_blank()) + guides(fill = guide_legend(reverse = T)) +
                             scale_fill_discrete(type = "Yolla")
    if(!TEST_MODE){
        ggsave(filename = paste("Fig/NK", drugName, "ConcEffect.pdf", sep = "_"), plot = gp, width = 10)
    }
}