# Patient ARPC1B deficient cells

**Findings**:

* Changes in descriptors observed in patients are confirmed at the individual cell level
* Radial position of granules can be predicted from actin features (both at the cell and image level and based on MIP or proximal plane)
* There is no major changes in these relations between patients and healthy donors, except a change in direction of the effect of the radial distribution of actin in patient 2 (for which cells with more actin at the center have granules closer to the center).
* In the distal planes, the less relative actin intensity we have in the outter part of the cytoplasm, the closer the granules are from the center.
* Patient 1 has a different distribution of standard deviation of distance to the membrane over depth than other donors.
* LFA-1 intensity and its distribution at the synapse does not tell much about the radial position of the granules.

**Open questions**:

* Is the difference in standard deviation of the distance to the membrane in patient 1 a difference in dispersion overall?

In [None]:
library(ggplot2)
library(extrafont)
library(stringr)
library(ggrepel)
library(reticulate)
library(gridExtra)
library(robustbase)
library(randomForest)
library(reshape2)
library(data.table)
library(sp)
library(igraph)
library(dplyr)

In [None]:
# Load external Python functions
source_python("reticulate_functions.py")
# Load extra fonts
ttf_import(paths = "/tmp/.fonts/")
loadfonts()
# Set this to true for faster compilation or false for more precise results and all outputs
TEST_MODE = F

In [None]:
# Customize ggplot appearance

# Change theme
customTheme <- theme_light() + 
               theme(panel.grid.minor=element_blank(), text=element_text(size=17, family="Arial", colour = "#333333"),
                     line=element_line(colour = "#333333"), legend.background = element_rect(fill=alpha('#CCCCCC', 0.1)), legend.key = element_blank())

# Change default colors
scale_colour_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_colour_gradient(...), 
        viridis = scale_colour_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))
}
scale_color_continuous <- scale_colour_continuous

scale_fill_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_fill_gradient(...), 
        viridis = scale_fill_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))

}

cemm_pal = colorRampPalette(c("#5A463C", "#008CAD", "#40B9D4", "#D4ECF2", "#D2323C", "#F8B100", "#DFDC00"))
cust_pal = colorRampPalette(c("#008CAD", "#40B9D4", "#D4ECF2", "#F8B100", "#C00000", "#2D0000"))
yolla_pal = colorRampPalette(c('#FC7070', '#C00000', '#2D0000'))
scale_fill_discrete <- function (..., type = "Cust", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "fill") 
{
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else if (type == "Yolla"){
        discrete_scale(aesthetics, "Yolla", yolla_pal, na.value = na.value, ...)
    } else if (type == "Cust"){
        discrete_scale(aesthetics, "Cust", cust_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}

scale_color_discrete <- function (..., type = "Cust", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "colour") {
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else if (type == "Cust"){
        discrete_scale(aesthetics, "Cust", cust_pal, na.value = na.value, ...)
    } else if (type == "Yolla"){
        discrete_scale(aesthetics, "Yolla", yolla_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", scales::hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}
scale_colour_discrete <- scale_color_discrete

noGridTheme <- function(...){
    theme(panel.grid.major=element_blank(), axis.text.x=element_text(size=12), axis.text.y=element_text(size=12),
                      axis.line=element_line(color="#333333", size = 0.2), panel.border = element_blank(), ...)
}

darkTheme <- function(...){
    theme(panel.background = element_rect(fill = '#333333'), plot.background = element_rect(fill = '#333333'), 
          axis.line=element_line(color="#CCCCCC", size = 0.2), 
          text=element_text(size=17, family="Arial", colour = "#CCCCCC"),
          line=element_line(colour = "#CCCCCC"))
}

theme_set(customTheme)

options(repr.plot.width=10, repr.plot.height=10)

## Patient cells (∆ARPC1B)

In [None]:
# Fix t-SNE layout and random number generation
set.seed(38)

### Integrate image information

In [None]:
LT = read.csv("Rsc/compiled_LT_ARPC1B_3D_Image.csv", header=T)

In [None]:
annotation = read.csv("Rsc/ARPC1BPlateLayout.csv", header=T)[1:90,] # Avoid terminal empty lines

In [None]:
getDonor <- function(row,column){
    return(annotation[(annotation$Row == row)&(annotation$Column == column),1])
}
getCoating <- function(row,column){
    return(annotation[(annotation$Row == row)&(annotation$Column == column),2])
}
LT$Donor = mapply(getDonor, LT$Metadata_Row, LT$Metadata_Column)
LT$Coating = mapply(getCoating, LT$Metadata_Row, LT$Metadata_Column)
# Is normal donor?
LT$ND = F
LT$ND[grep("ND", LT$Donor)] = T
# Which patient?
LT$Patient = ifelse(LT$ND, "ND", as.character(LT$Donor))

We discard the data from the PLL coated plates (not studied in this analysis).

In [None]:
LT = LT[LT$Coating != "PLL",]

In [None]:
LT$Donor[(LT$Metadata_Row == 14)&(LT$Metadata_Column == 8)]

In [None]:
LT$Donor[(LT$Metadata_Row == 3)&(LT$Metadata_Column == 8)]

In [None]:
LT$Donor[(LT$Metadata_Row == 4)&(LT$Metadata_Column == 15)]

In [None]:
LT$Donor[(LT$Metadata_Row == 8)&(LT$Metadata_Column == 16)]

In [None]:
gpLT = ggplot(LT[!is.na(LT$Mean_FilterNKCytoplasm_AreaShape_Area),]) + geom_histogram(aes(Mean_FilterNKCytoplasm_AreaShape_Area, 
                                                                                    fill = as.factor(Donor)), binwidth=100) +
       scale_fill_discrete(name="Row")
gpLT

### Filtering images

In [None]:
FILT_MAX_INT_DNA = 0.01 # Remove empty images and small DNA precipitations
FILT_MIN_CELLS = 5 # Most field of views have only 2 cells
FILT_NB_MAX_NA_IMAGE = 1
FILT_MAX_CORR = 0.6 # Keep uncorrelated variables

In [None]:
ftToKeep = 1:dim(LT)[2]
# Make sure that the fields are numeric
ftToKeep <- ftToKeep[which(sapply(LT[,ftToKeep], class) == "numeric")]
# Remove execution time and count features
ftToKeep <- ftToKeep[grep("(Execution)|(Count)|(Concentration)|(ActinGranules)", colnames(LT)[ftToKeep], invert = T)]

In [None]:
# Remove wells with low max DNA intensity
fieldToKeep <- which(LT$ImageQuality_MaxIntensity_DNA >= FILT_MAX_INT_DNA)
# Remove wells with low cell count
fieldToKeep <- na.omit(fieldToKeep[LT[fieldToKeep,]$Count_FilterNKNucleus >= FILT_MIN_CELLS])

In [None]:
# Few bad quality pictures are generating a lot of missing values and are removed
fieldToKeep <- fieldToKeep[rowSums(is.na(LT[fieldToKeep,ftToKeep])) < FILT_NB_MAX_NA_IMAGE]

In [None]:
imgToKeep = LT$ImageNumber[fieldToKeep]

### Load individual cell measurements

In [None]:
t_start = Sys.time()
cytoplasm = fread("Rsc/compiled_LT_ARPC1B_3D_FilterNKCytoplasm.csv", header=T)
dim(cytoplasm)
Sys.time() - t_start

In [None]:
colCytoplasm = c(
    "ImageNumber",
    "ObjectNumber",
    "AreaShape_Perimeter",
    "AreaShape_Area",
    "AreaShape_MaximumRadius",
    "AreaShape_MinorAxisLength",
    "AreaShape_MeanRadius",
    "AreaShape_FormFactor",
    "Intensity_MeanIntensity_Corr.*",
    "RadialDistribution_FracAtD_Corr.*",
    "Mean_FilterNKPerfGranules._Distance_Minimum_FilterNKCytoplasm",
    "Mean_FilterNKPerfGranules._Location_Center_."
)
grepList <- function(p) grep(pattern = p, x = names(cytoplasm))
indColCytoplasm = unlist(sapply(colCytoplasm, grepList))

### Filter corresponding cells

In [None]:
cytoplasm = cytoplasm[cytoplasm$ImageNumber %in% imgToKeep, ..indColCytoplasm]
dim(cytoplasm)

Visualize cutoff with:
```
ggplot(cell, aes(x = Intensity_MeanIntensity_CorrActin)) +
    geom_histogram(bins = 50) +
    geom_vline(xintercept = 0.02) +
    geom_vline(xintercept = 0.14) +
    facet_wrap(~Donor)
ggplot(cell, aes(x = AreaShape_Area)) +
    geom_histogram(bins = 50) +
    geom_vline(xintercept = 500) +
    geom_vline(xintercept = 5000) +
    facet_wrap(~Donor)
```

In [None]:
CELL_FILTER_MIN_ACTIN_INT = 0.02
CELL_FILTER_MAX_ACTIN_INT = 0.14
CELL_FILTER_MIN_AREA = 500
CELL_FILTER_MAX_AREA = 5000

In [None]:
f1 = cytoplasm$Intensity_MeanIntensity_CorrActin > CELL_FILTER_MIN_ACTIN_INT
f2 = cytoplasm$Intensity_MeanIntensity_CorrActin < CELL_FILTER_MAX_ACTIN_INT
f3 = cytoplasm$AreaShape_Area > CELL_FILTER_MIN_AREA
f4 = cytoplasm$AreaShape_Area < CELL_FILTER_MAX_AREA

In [None]:
cytoplasm = cytoplasm[f1 & f2 & f3 & f4,]
dim(cytoplasm)

In [None]:
cell = cytoplasm

For now we consider that all columns selected should be kept, despite some having NAs (e.g. when a cell doesn't have any granule segmented on a given plane.

In [None]:
getDonor <- function(imageN){
    return(LT$Donor[LT$ImageNumber == imageN])
}
cellDonor = sapply(cell$ImageNumber, getDonor)

In [None]:
isNDcell <- function(imageN){
    return(LT$ND[LT$ImageNumber == imageN])
}
cellND = sapply(cell$ImageNumber, isNDcell)

## Granule position regression

NB: Other models did not perform significantly better.

```R
    set.seed(38)

    # We select actin and cytoplasm related features to explain other variables
    predictiveFtInd = c(grep("\\.x", names(cell)[ftToKeep]), which(names(cell)[ftToKeep] %in% names(cytoplasm)))
    predictiveFt = names(cell)[ftToKeep][predictiveFtInd]
    # Remove granule related feature
    predictiveFtInd = grep("Granule", predictiveFt, invert = T)
    predictiveFt = names(cell)[ftToKeep][predictiveFtInd]

    form1 = as.formula(paste("Mean_FilterNKPerfGranules_Distance_Minimum_FilterNKCytoplasm ~ ", 
                             paste(predictiveFt, collapse = " + ")))
    model1 = lm(form1, data = cell[cellToKeep, ftToKeep])

    summary(model1)

    form2 = as.formula(paste("Intensity_IntegratedIntensity_DistNuc ~ ", 
                             paste(predictiveFt, collapse = " + ")))
    model2 = lm(form2, data = cell[cellToKeep, ftToKeep])

    summary(model2)

    form3 = as.formula(paste("Intensity_MeanIntensity_DistNuc ~ ", 
                             paste(predictiveFt, collapse = " + ")))
    model3 = lm(form3, data = cell[cellToKeep, ftToKeep])

    summary(model3)

    form4 = as.formula(paste("Intensity_MedianIntensity_DistNuc ~ ", 
                             paste(predictiveFt, collapse = " + ")))
    model4 = lm(form4, data = cell[cellToKeep, ftToKeep])

    summary(model4)
```

We explore how much can be predicted from easily interpretable actin and shape features alone.

In [None]:
interpretableFt = c('AreaShape_Perimeter', 'AreaShape_MaximumRadius', 'AreaShape_MinorAxisLength',
                    'AreaShape_MeanRadius', 'AreaShape_FormFactor', 'Intensity_MeanIntensity_CorrActin',
                    'RadialDistribution_FracAtD_CorrActin1_1of3', 'RadialDistribution_FracAtD_CorrActin1_2of3')

In [None]:
form_lm = as.formula(paste("Mean_FilterNKPerfGranules1_Distance_Minimum_FilterNKCytoplasm ~ ", 
                         paste(interpretableFt, collapse = " + ")))
model_nd = lm(form_lm, data = cell[cellND,])

In [None]:
model_pt1 = lm(form_lm, data = cell[cellDonor == "PATIENT 1",])

In [None]:
model_pt2 = lm(form_lm, data = cell[cellDonor == "PATIENT 2",])

In [None]:
# Export data (used to find representative cells)
if(!TEST_MODE){
    write.csv(cell[cellND,], "Tab/LT_ARPC1B_granule_cell_nd.csv", row.names = F)
    write.csv(cell[cellDonor == "PATIENT 2",], "Tab/LT_ARPC1B_granule_cell_pt2.csv", row.names = F)
}

In [None]:
summary(model_nd)

In [None]:
summary(model_pt1)

In [None]:
summary(model_pt2)

## Visualize links

In [None]:
g = make_star(n = length(model_nd$coefficients), mode = "undirected")
V(g)$names <- names(model_nd$coefficients)
V(g)$names[1] <- as.character(form_lm[2])
g <- set_edge_attr(g, "color", value = ifelse(model_nd$coefficients[-1] > 0, "#00dd00", "#dd0000"))
plot(g, vertex.label = V(g)$names)

In [None]:
if(!TEST_MODE){
    pdf("Fig/LT_ARPC1B_granule_network_per_cell_nd.pdf")
    plot(g, vertex.label = V(g)$names)
    dev.off()
}

In [None]:
g = make_star(n = length(model_pt1$coefficients), mode = "undirected")
V(g)$names <- names(model_pt1$coefficients)
V(g)$names[1] <- as.character(form_lm[2])
g <- set_edge_attr(g, "color", value = ifelse(model_pt1$coefficients[-1] > 0, "#00dd00", "#dd0000"))
plot(g, vertex.label = V(g)$names)

In [None]:
if(!TEST_MODE){
    pdf("Fig/LT_ARPC1B_granule_network_per_cell_pt1.pdf")
    plot(g, vertex.label = V(g)$names)
    dev.off()
}

In [None]:
g = make_star(n = length(model_pt2$coefficients), mode = "undirected")
V(g)$names <- names(model_pt2$coefficients)
V(g)$names[1] <- as.character(form_lm[2])
g <- set_edge_attr(g, "color", value = ifelse(model_pt2$coefficients[-1] > 0, "#00dd00", "#dd0000"))
plot(g, vertex.label = V(g)$names)

In [None]:
if(!TEST_MODE){
    pdf("Fig/LT_ARPC1B_granule_network_per_cell_pt2.pdf")
    plot(g, vertex.label = V(g)$names)
    dev.off()
}

### Prediction from LFA-1 features

In [None]:
names(cell)[grep("LFA", names(cell))]

In [None]:
lfa1Ft = c('Intensity_MeanIntensity_CorrLFA1', 'RadialDistribution_FracAtD_CorrLFA1_1of3',
                    'RadialDistribution_FracAtD_CorrLFA1_2of3')
form_lfa_lm = as.formula(paste("Mean_FilterNKPerfGranules1_Distance_Minimum_FilterNKCytoplasm ~ ", 
                         paste(lfa1Ft, collapse = " + ")))
model_lfa_nd = lm(form_lfa_lm, data = cytoplasm)

In [None]:
summary(model_lfa_nd)

In [None]:
plot(model_lfa_nd)

In [None]:
plot(x = model_lfa_nd$fitted.values,
     y = na.omit(cytoplasm$Mean_FilterNKPerfGranules1_Distance_Minimum_FilterNKCytoplasm))

## Intensity relations per level

In [None]:
cell$Donor = cellDonor

In [None]:
gp <- ggplot(cell, aes(x = Mean_FilterNKPerfGranules4_Distance_Minimum_FilterNKCytoplasm,
                 y = RadialDistribution_FracAtD_CorrActin4_1of3,
                 color = Donor)) +
    geom_point(alpha = 0.04) + 
    geom_density_2d() + 
    scale_color_manual(values = c("#E5CE79", "#E5CE79", "#E5CE79", "#198CAC", "#198CAC")) +
    guides(color = F) +
    facet_wrap(~Donor)

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/LT_ARPC1B_granule_count_and_radial_z4.pdf", plot = gp, height = 7, width = 10)
}

## Load individual slide granule measurement

In [None]:
cell = cytoplasm
cell$Donor = sapply(cell$ImageNumber, getDonor)

### Granule dispersion
We want to include dispersion both on the X and Y axes in a symmetric manner (as these directions do not represent anything particular to the cells).  
We use the Standard Distance Deviation defined as:  
$$
\begin{aligned}
SDD &= \sqrt{\frac{\sum_i^n (x_i - \hat{x})^2}{n-2} + \frac{\sum_i^n (y_i - \hat{y})^2}{n-2}} \\ 
&= \sqrt{\frac{n-1}{n-2} \times \bigg(\frac{\sum_i^n (x_i - \hat{x})^2}{n-1} + \frac{\sum_i^n (y_i - \hat{y})^2}{n-1}\bigg)} \\
&= \sqrt{\frac{n-1}{n-2} \times \big(\text{var}(x) + \text{var}(y)\big)}
\end{aligned}
$$ 
To try and include the effect of cytoplasm area, we measure as well the area of the convex hull including the centers of all granules to compare this value to the cytoplasm area.

In [None]:
get_hull_area <- function(x, y){
    # If we have less than 3 points, 
    # the hull is empty
    if (length(x) < 3){
        return(0)
    }
    X = cbind(x,y)
    hull = chull(X)
    hpts = c(hull, hull[1])
    hull_poly = Polygon(X[hpts, ], hole=F)
    return(hull_poly@area)
}

In [None]:
addGranuleFeatures <- function(i){
    # Read granule measurements at given height
    granule = fread(paste0("Rsc/compiled_LT_ARPC1B_3D_FilterNKPerfGranules", i, ".csv"), header=T)
    
    colGran = names(granule) %in% c("Parent_FilterNKCytoplasm", 
                                "ImageNumber", 
                                "Location_Center_X", 
                                "Location_Center_Y",
                                "Distance_Minimum_FilterNKCytoplasm")
    
    # Add granule count, mean distance to membrane, 
    # standard deviation of this distance, SDD of granules and 
    # area of convex hull
    aggGranule = granule[,colGran,with=F] %>% 
                 group_by(ImageNumber, Parent_FilterNKCytoplasm) %>% 
                 summarise(Count = n(), 
                           VarX = var(Location_Center_X), 
                           VarY = var(Location_Center_Y),
                           HullArea = get_hull_area(Location_Center_X, Location_Center_Y),
                           MeanDist = mean(Distance_Minimum_FilterNKCytoplasm),
                           SdDist = sd(Distance_Minimum_FilterNKCytoplasm))
    aggGranule$SDD = sqrt( (aggGranule$Count - 1)/(aggGranule$Count - 2) * 
                           (aggGranule$VarX + aggGranule$VarY) )
    
    names(aggGranule)[-c(1,2)] = paste0(paste0("Gran", i, "_"), names(aggGranule)[-c(1,2)])
    
    cell = merge(cell, aggGranule, by.x = c("ImageNumber", "ObjectNumber"),
                                   by.y = c("ImageNumber", "Parent_FilterNKCytoplasm"))
    
    # Free up memory
    granule = 0
    
    return(cell)
}

In [None]:
cell = addGranuleFeatures(1)

In [None]:
# Sanity check: mean distance to membrane calculated from granule file matches the ones included by CellProfiler
# in the cytoplasm file.
ggplot(cell, aes(x = Mean_FilterNKPerfGranules1_Distance_Minimum_FilterNKCytoplasm,
               y = Gran1_MeanDist, color = Donor)) + geom_point() + facet_wrap(~Donor)

In [None]:
cell = addGranuleFeatures(2)
cell = addGranuleFeatures(3)
cell = addGranuleFeatures(4)
cell = addGranuleFeatures(5)
cell = addGranuleFeatures(6)
cell = addGranuleFeatures(7)
cell = addGranuleFeatures(8)

In [None]:
# Sanity check: mean distance to membrane calculated from granule file matches the ones included by CellProfiler
# in the cytoplasm file.
ggplot(cell, aes(x = Mean_FilterNKPerfGranules8_Distance_Minimum_FilterNKCytoplasm,
               y = Gran8_MeanDist, color = Donor)) + geom_point() + facet_wrap(~Donor)

In [None]:
granuleCount = cell[, c(1,2,133,grep("Count", names(cell))), with=F]
granuleCount = melt(granuleCount, id.vars = c("ImageNumber", "ObjectNumber", "Donor"))
levels(granuleCount$variable) = paste0("z=", 1:8)
gp <- ggplot(granuleCount, aes(x = variable, y = value)) + 
            geom_line(aes(group = ImageNumber, color = Donor), alpha = 0.1)+
            geom_violin(fill = "grey80", draw_quantiles = c(0.25, 0.5, 0.75)) + 
            xlab("Depth") + ylab("Granule count") + facet_wrap(~Donor) + 
            scale_color_manual(values = c("#E5CE79", "#E5CE79", "#E5CE79", "#198CAC", "#198CAC")) +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) + guides(color = F)
print(gp)

In [None]:
if(!TEST_MODE){
    ggsave(filename = "Fig/LT_ARPC1B_granule_count_per_donor.pdf", plot = gp, height = 7, width = 10)
}

In [None]:
sessionInfo()