# Drug plates

In [None]:
library(ggplot2)
library(extrafont)
library(stringr)
library(heatmaply)
library(ggrepel)
library(reticulate)
library(gridExtra)
library(robustbase)
library(randomForest)
library(reshape2)

In [None]:
# Load external Python functions
source_python("reticulate_functions.py")
# Load extra fonts
ttf_import(paths = "/tmp/.fonts/")
loadfonts()
# Set this to true for faster compilation or false for more precise results and all outputs
TEST_MODE = T

In [None]:
# Customize ggplot appearance

# Change theme
customTheme <- theme_light() + 
               theme(panel.grid.minor=element_blank(), text=element_text(size=17, family="Arial", colour = "#333333"),
                     line=element_line(colour = "#333333"), legend.background = element_rect(fill=alpha('#CCCCCC', 0.1)), legend.key = element_blank())

# Change default colors
scale_colour_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_colour_gradient(...), 
        viridis = scale_colour_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))
}
scale_color_continuous <- scale_colour_continuous

scale_fill_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_fill_gradient(...), 
        viridis = scale_fill_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))

}

cemm_pal = colorRampPalette(c("#5A463C", "#008CAD", "#40B9D4", "#D4ECF2", "#D2323C", "#F8B100", "#DFDC00"))
cust_pal = colorRampPalette(c("#008CAD", "#40B9D4", "#D4ECF2", "#F8B100", "#C00000", "#2D0000"))
yolla_pal = colorRampPalette(c('#FC7070', '#C00000', '#2D0000'))
scale_fill_discrete <- function (..., type = "Cust", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "fill") 
{
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else if (type == "Yolla"){
        discrete_scale(aesthetics, "Yolla", yolla_pal, na.value = na.value, ...)
    } else if (type == "Cust"){
        discrete_scale(aesthetics, "Cust", cust_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}

scale_color_discrete <- function (..., type = "Cust", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "colour") {
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else if (type == "Cust"){
        discrete_scale(aesthetics, "Cust", cust_pal, na.value = na.value, ...)
    } else if (type == "Yolla"){
        discrete_scale(aesthetics, "Yolla", yolla_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", scales::hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}
scale_colour_discrete <- scale_color_discrete

noGridTheme <- function(...){
    theme(panel.grid.major=element_blank(), axis.text.x=element_text(size=12), axis.text.y=element_text(size=12),
                      axis.line=element_line(color="#333333", size = 0.2), panel.border = element_blank(), ...)
}

darkTheme <- function(...){
    theme(panel.background = element_rect(fill = '#333333'), plot.background = element_rect(fill = '#333333'), 
          axis.line=element_line(color="#CCCCCC", size = 0.2), 
          text=element_text(size=17, family="Arial", colour = "#CCCCCC"),
          line=element_line(colour = "#CCCCCC"))
}

theme_set(customTheme)

options(repr.plot.width=10, repr.plot.height=10)

## NK92

In [None]:
# Fix random number generation
set.seed(38)

In [None]:
NK = read.csv("Rsc/compiled_NKDrug_PerfPosition_Image.csv", header=T)

NB: Image measurements are matching previous run of CellProfiler (which did not include unstacked granule positional information).
```
NK = read.csv("Rsc/allImages_NK_Drugs.csv", header=T)
NK2 = read.csv("Rsc/compiled_NKDrug_PerfPosition_Image.csv", header=T)
ordNK = order(NK$Metadata_Well, NK$Metadata_Field)
ordNK2 = order(NK2$Metadata_Well, NK2$Metadata_Field)
dft = data.frame(x = NK$Texture_SumEntropy_CorrDNA_6_01[ordNK],
                 y = NK2$Texture_SumEntropy_CorrDNA_6_01[ordNK2])
ggplot(dft, aes(x = x, y = y)) + geom_point() + xlab("Texture first run") + ylab("Texture second run")
```
However slightly more cells were identified in the second run - despite the segmentation thresholds being the same. Ignoring this for now.

In [None]:
Layout = read.csv("Rsc/DrugPlateLayout.csv", header=T)
Layout$Well <- paste0(Layout$Row, str_pad(Layout$Column, 2, pad = "0"))

In [None]:
NK$Drug = sapply(NK$Metadata_Well, function(x) Layout$Drug[x == Layout$Well])
levels(NK$Drug)[levels(NK$Drug)=="Y27"] <- "Y-27632"
levels(NK$Drug)[levels(NK$Drug)=="Jasplaknolide"] <- "Jasplakinolide"
NK$Concentration = sapply(NK$Metadata_Well, function(x) Layout$Concentration[x == Layout$Well])

In [None]:
gpNK = ggplot(NK[!is.na(NK$Count_FilteredNucleus),]) + geom_histogram(aes(Count_FilteredNucleus, fill = as.factor(Metadata_Row)), binwidth=2) +
       scale_fill_discrete(name="Row")
gpNK

if(!TEST_MODE){
    ggsave(filename = "Fig/NK_Drug_count.pdf", plot = gpNK)
}

In [None]:
gpNK = ggplot(NK[!is.na(NK$Mean_FilterCytoplasm_AreaShape_Area),]) + geom_histogram(aes(Mean_FilterCytoplasm_AreaShape_Area, 
                                                                                    fill = as.factor(Metadata_Row)), binwidth=100) +
       scale_fill_discrete(name="Row")
gpNK
if(!TEST_MODE){
    ggsave(filename = "Fig/NK_Drug_area.pdf", plot = gpNK)
}

### Filtering

In [None]:
FILT_MAX_INT_DNA = 0.05 # Remove empty images and small DNA precipitations
FILT_MIN_CELLS = 4 # 8 seems safe from distribution and images, 3 seems in poor shape
FILT_NB_MAX_NA_IMAGE = 10
FILT_MAX_CORR = 0.6 # Keep uncorrelated variables
dimUMAP = 3

In [None]:
ftToKeep = 1:dim(NK)[2]
# Make sure that the fields are numeric
ftToKeep <- ftToKeep[which(sapply(NK[,ftToKeep], class) == "numeric")]
# Remove execution time and count features
ftToKeep <- ftToKeep[grep("(Execution)|(Count)|(Concentration)", colnames(NK)[ftToKeep], invert = T)]

In [None]:
# Store all remaining features before filtering for downstream comparison to selected set
preFiltFt = colnames(NK)[ftToKeep]

In [None]:
# Remove wells with low max DNA intensity
fieldToKeep <- which(NK$ImageQuality_MaxIntensity_DNA >= FILT_MAX_INT_DNA)
# Remove wells with low cell count
fieldToKeep <- fieldToKeep[NK[fieldToKeep,]$Count_FilteredNucleus >= FILT_MIN_CELLS]

In [None]:
# Few bad quality pictures are generating a lot of missing values and are removed
fieldToKeep <- fieldToKeep[rowSums(is.na(NK[fieldToKeep,ftToKeep])) < FILT_NB_MAX_NA_IMAGE]
# We exclude drugs for which more than half of the images were filtered
drugToRemove <- levels(NK$Drug)[table(NK$Drug[fieldToKeep]) < 180]
fieldToKeep <- fieldToKeep[!NK$Drug[fieldToKeep] %in% drugToRemove]

In [None]:
# Remove remaining features with missing values
ftToKeep <- ftToKeep[colSums(is.na(NK[fieldToKeep,ftToKeep])) == 0] 
# Remove constant columns
indWT = NK[fieldToKeep,]$Drug == "WT"
ftToKeep <- ftToKeep[sapply(NK[fieldToKeep,ftToKeep], function(x) mad(x) != 0)]
ftToKeep <- ftToKeep[sapply(NK[fieldToKeep[indWT],ftToKeep], function(x) mad(x) != 0)]

In [None]:
dim(NK)
dim(NK[fieldToKeep, ftToKeep])

Now, we don't do a full profiling but we want to explore values pertaining to perforin granule positioning, so we don't transform the data.

## Granule count per image

In [None]:
subNK = NK[fieldToKeep,]
subNK$Drug = droplevels(subNK$Drug)
granuleCountWT = subNK[subNK$Drug == "WT",grep("Count_PerfGranules", names(subNK))]
granuleCountWT$imageID <- as.numeric(rownames(granuleCountWT))
granuleCountWT = melt(granuleCountWT, id.vars = "imageID")

In [None]:
ggplot(granuleCountWT, aes(x = variable, y = value)) + 
    geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) + 
    xlab("") + ylab("Granule count") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [None]:
ggplot(granuleCountWT, aes(x = variable, y = value, color = imageID, group = imageID)) + 
    geom_line(alpha = 0.3) + xlab("") + ylab("Granule count") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

In [None]:
# Display overlay for each drug
for (drug in levels(subNK$Drug)){
    granuleCount = subNK[subNK$Drug == drug, grep("Count_PerfGranules", names(subNK))]
    granuleCount$imageID <- as.numeric(rownames(granuleCount))
    granuleCount = melt(granuleCount, id.vars = "imageID")
    gp <- ggplot(granuleCount, aes(x = variable, y = value)) + 
            geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) + 
            geom_line(aes(color = imageID, group = imageID), alpha = 0.15)+
            xlab("") + ylab("Granule count") + ggtitle(drug) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1))
    print(gp)
    
    if(!TEST_MODE){
        ggsave(filename = paste0("Fig/NK_Drug_PerfGranuleCount_", drug, ".pdf"), plot = gp)
    }
}

## Individual granule measurements

In [None]:
imgWT = subNK$ImageNumber[subNK$Drug == "WT"]

### Granule dispersion
We want to include dispersion both on the X and Y axes in a symmetric manner (as these directions do not represent anything particular to the cells).  
We use the Standard Distance Deviation defined as:  
$$
\begin{aligned}
SDD &= \sqrt{\frac{\sum_i^n (x_i - \hat{x})^2}{n-2} + \frac{\sum_i^n (y_i - \hat{y})^2}{n-2}} \\ 
&= \sqrt{\frac{n-1}{n-2} \times \bigg(\frac{\sum_i^n (x_i - \hat{x})^2}{n-1} + \frac{\sum_i^n (y_i - \hat{y})^2}{n-1}\bigg)} \\
&= \sqrt{\frac{n-1}{n-2} \times \big(\text{var}(x) + \text{var}(y)\big)}
\end{aligned}
$$.

### Distance between granules and nuclei
To associate a FilteredNucleus object to a granule, we need to join them with the following object relationships:
FilteredNucleus -> Nucleus -> Cytoplasm -> FilterCytoplasm -> Granule.  
Here we prepare such a correspondance table.

In [None]:
nuc = read.csv(paste0("Rsc/compiled_NKDrug_PerfPosition_FilteredNucleus.csv"), header=T)

In [None]:
nucCytoMatch = nuc[,names(nuc) %in% c("ImageNumber", "ObjectNumber", 
                                      "Parent_Nucleus", "Location_Center_X", "Location_Center_Y")]

In [None]:
objRelation = read.csv(paste0("Rsc/compiled_NKDrug_PerfPosition_Object relationships.csv"), header=T)

In [None]:
origNucCytoMatch = objRelation[objRelation$First.Object.Name == "Nucleus", 
                               names(objRelation) %in% c("First.Image.Number", 
                                                         "First.Object.Number",
                                                         "Second.Object.Number")]

In [None]:
nucCytoMatch = merge(nucCytoMatch, origNucCytoMatch, 
                     by.x = c("ImageNumber", "Parent_Nucleus"), 
                     by.y = c("First.Image.Number", "First.Object.Number"))

In [None]:
cyt = read.csv(paste0("Rsc/compiled_NKDrug_PerfPosition_FilterCytoplasm.csv"), header=T)

In [None]:
origCytoCytoMatch = cyt[, names(cyt) %in% c("ImageNumber", "ObjectNumber", "Parent_Cytoplasm")]

In [None]:
nucCytoMatch = merge(nucCytoMatch, origCytoCytoMatch, 
                     by.x = c("ImageNumber", "Second.Object.Number"), 
                     by.y = c("ImageNumber", "Parent_Cytoplasm"))
# Now ObjectNumber.y corresponds to the ID of a FilterCytoplasm 
# while Location_Center_X and Location_Center_Y are the location of its nucleus centroid

In [None]:
# Free up memory
cyt = 0
objRelation = 0
nuc = 0
gc()

### Run per-granule measurements for all Z

In [None]:
indGranPlots = function(i){
    gran = read.csv(paste0("Rsc/compiled_NKDrug_PerfPosition_PerfGranules", i, ".csv"), header=T)
    granToKeep = gran$ImageNumber %in% imgWT
    
        
    ### Aggregate results per cell
    colGran = names(gran) %in% c("Parent_FilterCytoplasm", "ImageNumber", "Location_Center_X", "Location_Center_Y")
    aggGran = gran[granToKeep,colGran] %>% 
              group_by(ImageNumber, Parent_FilterCytoplasm) %>% 
              summarise(count = n(), LocX = median(Location_Center_X), LocY = median(Location_Center_Y),
                        VarX = var(Location_Center_X), VarY = var(Location_Center_Y))
    aggGran$SDD = sqrt( (aggGran$count - 1)/(aggGran$count - 2) * (aggGran$VarX + aggGran$VarY) )
    
    gp <- ggplot(aggGran) + geom_histogram(aes(x = SDD), binwidth = 1) + 
          ggtitle(paste("Z =", i))
    print(gp)
    
    if(!TEST_MODE){
        ggsave(filename = paste0("Fig/NK_Drug_PerfGranuleSDD_", i, ".pdf"), plot = gp)
    }
    
    granCytoMatch = merge(gran[granToKeep,colGran], nucCytoMatch, 
                          by.x = c("ImageNumber", "Parent_FilterCytoplasm"), 
                          by.y = c("ImageNumber", "ObjectNumber.y"),
                          all.x = T, sort = F)
    
    # Compute distance between each granule and the nucleus center
    aggDist = granCytoMatch %>% group_by(ImageNumber, Parent_FilterCytoplasm) %>% 
                          mutate(Distance = sqrt((Location_Center_X.x - Location_Center_X.y)**2 + 
                                                           (Location_Center_Y.x - Location_Center_Y.y)**2))
    
    return(data.frame(EdgeDist = gran[granToKeep,]$Distance_Minimum_FilterCytoplasm, 
                      NucDist = aggDist$Distance, Z = i))
}

In [None]:
# Takes ~10-15 minutes to run
distDF = sapply(as.character(1:8), indGranPlots)
distDF = data.frame(EdgeDist = unlist(distDF[1,]), NucDist = unlist(distDF[2,]), Z = unlist(distDF[3,]))

In [None]:
gp <- ggplot(distDF, aes(x = Z, y = EdgeDist)) + geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) +
      ylab("Distance to cell edge")

print(gp)

if(!TEST_MODE){
    ggsave(filename = "Fig/NK_Drug_PerfGranuleDistToMembrane.pdf", plot = gp)
}

In [None]:
gp <- ggplot(distDF, aes(x = Z, y = NucDist)) + geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) +
      ylab("Distance to nucleus center")

print(gp)

if(!TEST_MODE){
    ggsave(filename = "Fig/NK_Drug_PerfGranuleDistToNuclei.pdf", plot = gp)
}

### Run per-cell measurements for all Z

Quite reassuring: closer to the membrane usually means further away from the nucleus center:

    ggplot(aggGran, aes(x = NucDist, y = EdgeDist)) + geom_density_2d(color = c("brown"))

In [None]:
cellGranPlots = function(i){
    gran = read.csv(paste0("Rsc/compiled_NKDrug_PerfPosition_PerfGranules", i, ".csv"), header=T)
    granToKeep = gran$ImageNumber %in% imgWT

    ### Aggregate results per cell
    colGran = names(gran) %in% c("Parent_FilterCytoplasm", "ImageNumber", 
                                 "Distance_Minimum_FilterCytoplasm",
                                 "Location_Center_X", "Location_Center_Y")

    granCytoMatch = merge(gran[granToKeep,colGran], nucCytoMatch, 
                          by.x = c("ImageNumber", "Parent_FilterCytoplasm"), 
                          by.y = c("ImageNumber", "ObjectNumber.y"),
                          all.x = T, sort = F)

    # Compute distance between each granule and the nucleus center
    aggGran = granCytoMatch %>% mutate(Distance = sqrt((Location_Center_X.x - Location_Center_X.y)**2 + 
                                                       (Location_Center_Y.x - Location_Center_Y.y)**2))

    aggGran = aggGran %>% group_by(ImageNumber, Parent_FilterCytoplasm) %>% 
              summarise(count = n(), LocX = median(Location_Center_X.x), LocY = median(Location_Center_Y.x),
                        VarX = var(Location_Center_X.x), VarY = var(Location_Center_Y.x),
                        NucDist = median(sqrt((Location_Center_X.x - Location_Center_X.y)**2 + 
                                               (Location_Center_Y.x - Location_Center_Y.y)**2)),
                        EdgeDist = median(Distance_Minimum_FilterCytoplasm))
    aggGran$SDD = sqrt( (aggGran$count - 1)/(aggGran$count - 2) * (aggGran$VarX + aggGran$VarY) )
    
    # Include slice of origin into final data frame
    aggGran$Z = i
    
    return(aggGran[,names(aggGran) %in% c("count", "NucDist", "EdgeDist", "SDD", "Z")])
}

In [None]:
# Takes ~8/15 minutes to run
startTime <- proc.time()
cellDistDF = sapply(as.character(1:8), cellGranPlots)
cellDistDF = data.frame(Count = unlist(cellDistDF[1,]), NucDist = unlist(cellDistDF[2,]), 
                        EdgeDist = unlist(cellDistDF[3,]), SDD = unlist(cellDistDF[4,]), 
                        Z = unlist(cellDistDF[5,]))
proc.time() - startTime

In [None]:
gp <- ggplot(cellDistDF, aes(x = Z, y = NucDist)) + geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) +
      ylab("Distance to nucleus center")

print(gp)

bif(!TEST_MODE){
    ggsave(filename = "Fig/NK_Drug_PerfGranuleDistToNuclei_PerCell.pdf", plot = gp)
}

In [None]:
gp <- ggplot(cellDistDF, aes(x = Z, y = NucDist)) + geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) +
      ylab("Distance to cell edge")

print(gp)

if(!TEST_MODE){
        ggsave(filename = "Fig/NK_Drug_PerfGranuleDistToMembrane_PerCell.pdf", plot = gp)
}

In [None]:
gp <- ggplot(cellDistDF, aes(x = Z, y = Count)) + geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) +
      ylab("Granules per cell")

print(gp)

if(!TEST_MODE){
    ggsave(filename = "Fig/NK_Drug_PerfGranuleCount_PerCell.pdf", plot = gp)
}

In [None]:
gp <- ggplot(cellDistDF, aes(x = Z, y = SDD)) + geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) +
      ylab("Standard Distance D")

print(gp)

if(!TEST_MODE){
    ggsave(filename = "Fig/NK_Drug_PerfGranuleSDD_PerCell.pdf", plot = gp)
}