In [None]:
using CSV, StatsBase, Statistics, DataFrames, UMAP, RCall, FreqTables
using MultipleTesting, Random, MultivariateStats, Distributed, CategoricalArrays
using LightGraphs, SimpleWeightedGraphs
using HTTP, JSON
using BioProfiling
using LightGraphs

In [None]:
using Dates: now
now()

## R Calls

In [None]:
@rlibrary ggplot2
@rlibrary extrafont
@rlibrary viridis
@rlibrary heatmaply
@rlibrary ggrepel

In [None]:
R"""
# Used later for MCD computation

library(robustbase)

# Customize ggplot appearance

library(ggplot2)
library(extrafont)


# Load extra fonts
# ttf_import("/tmp/.fonts")
# loadfonts()

# Change theme
customTheme <- theme_light() + 
               theme(panel.grid.minor=element_blank(), text=element_text(size=17, family="Arial", colour = "#333333"),
                     line=element_line(colour = "#333333"), 
                     legend.background = element_rect(fill=alpha('#CCCCCC', 0.1)), legend.key = element_blank())

# Change default colors
scale_colour_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_colour_gradient(...), 
        viridis = scale_colour_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))
}
scale_color_continuous <- scale_colour_continuous

scale_fill_continuous <- function (..., begin = 0.1, end = 0.9, direction = -1, option = "plasma", 
                                     type = getOption("ggplot2.continuous.colour", default = "viridis")) {
    switch(type, gradient = scale_fill_gradient(...), 
        viridis = scale_fill_viridis_c(option = option, begin = begin, end = end, direction = direction, ...), 
        stop("Unknown scale type", call. = FALSE))

}

cemm_pal = colorRampPalette(c("#5A463C", "#008CAD", "#40B9D4", "#D4ECF2", "#D2323C", "#F8B100", "#DFDC00"))
scale_fill_discrete <- function (..., type = "CeMM", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "fill") 
{
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}

scale_color_discrete <- function (..., type = "CeMM", h = c(0, 360) + 15, c = 100, l = 65, h.start = 0, 
    direction = 1, na.value = "grey50", aesthetics = "colour") {
    if (type == "CeMM"){
        discrete_scale(aesthetics, "CeMM", cemm_pal, na.value = na.value, ...)
    } else {
        discrete_scale(aesthetics, "hue", scales::hue_pal(h, c, l, h.start, 
            direction), na.value = na.value, ...)
    }
}
scale_colour_discrete <- scale_color_discrete

noGridTheme <- function(...){
    theme(panel.grid.major=element_blank(), axis.text.x=element_text(size=12), axis.text.y=element_text(size=12),
                      axis.line=element_line(color="#333333", size = 0.2), panel.border = element_blank(), ...)
}

darkTheme <- function(...){
    theme(panel.background = element_rect(fill = '#333333'), plot.background = element_rect(fill = '#333333'), 
          axis.line=element_line(color="#CCCCCC", size = 0.2), 
          text=element_text(size=17, family="Arial", colour = "#CCCCCC"),
          line=element_line(colour = "#CCCCCC"))
}

theme_set(customTheme)

options(repr.plot.width=10, repr.plot.height=10)
"""

## Load pre-computed statistical distances to DMSO

In [None]:
RMPV = CSV.read("data/RMPV.csv", DataFrame) 

In [None]:
RMPV.Condition[RMPV.RMPV .< 0.1]

## Get annotations

In [None]:
salt_to_remove = [" maleate", " hydrochloride", " nitrate", 
                  " dihydrochloride", " chloride", " sulfate", 
                  " hydrate", " mesylate", " oxalate", " salt",
                  " from Penicillium brefeldianum", " monohydrate",
                  " trifluoroacetate", " acetate", " isethionate",
                  " hemisulfate", " angular", " sodium", " fumarate",
                  " methanesulfonate", " hemihydrate", " (MW = 374.83)",
                  "(+/-)-", "(+)-", "(-)-", "S-(+)-", "(S)-", "(±)-", "D-"]

# It seems removing this set of enantiomer indications leads
# to retrieving the correct compound annotation, as of the current release of the API.

"""Other compounds might include salts but are anyway not found in the LINCS database at all:
    AC-93253 iodide
    N-p-Tosyl-L-phenylalanine chloromethyl ketone
    4-(2-Aminoethyl)benzenesulfonyl fluoride hydrochloride
    UNC0379 trifluoroacetate salt"""

salt_dict = Dict(s => s"" for s in salt_to_remove)

```
https://api.clue.io/api/perts?filter={"where":{"pert_iname":"(-)-Quinpirole"}}&user_key=1fee664f310c86c5cb009c323de941db
```

In [None]:
"""
This function parse a list of compound canonical IDs and extract mechanism of actions (MOA)
annotated in the LINCS perturbation database when provided. Returns a list of annotates MOAs
See https://clue.io/developer-resources#apisection
"""
function getMOA(cpd::String)
    user_key = "1fee664f310c86c5cb009c323de941db"
    rootURL = "https://api.clue.io/api/perts?filter={\"where\":{\"pert_iname\":\""
    typeURL = "\"},\"fields\":{\"moa\":true}}&user_key="
    # NB: case-dependent. Dashes are handled. Spaces are usually replaced by dashes.
    
    cpd_no_salt = reduce(replace, salt_dict, init=cpd)
    
    rq = rootURL * lowercase(cpd_no_salt) * typeURL * user_key
    
    try
        resRq = HTTP.get(rq)
        moaRQ = JSON.Parser.parse(String(resRq.body))[1]
        return(moaRQ["moa"])
    catch e
        if isa(e, BoundsError)
            rq = rootURL * uppercase(cpd_no_salt) * typeURL * user_key
            try
                resRq = HTTP.get(rq)
                moaRQ = JSON.Parser.parse(String(resRq.body))[1]
                return(moaRQ["moa"])
            catch e
                if isa(e, BoundsError)
                println(cpd*" does not have an MOA annotation.")
                end
            end
        elseif isa(e, HTTP.ExceptionRequest.StatusError)
            println(cpd*" raises a 502 error.")
        end
        return()
    end
end

In [None]:
"""
This function parse a list of compound canonical IDs and extract mechanism of actions (MOA)
annotated in the LINCS perturbation database when provided. Returns a list of annotates MOAs
See https://clue.io/developer-resources#apisection
"""
function get_MOA_and_target(cpd::String)
    user_key = "1fee664f310c86c5cb009c323de941db"
    rootURL = "https://api.clue.io/api/perts?filter={\"where\":{\"pert_iname\":\""
    typeURL = "\"},\"fields\":{\"moa\":true,\"target\":true}}&user_key="
    # NB: case-dependent. Dashes are handled. Spaces are usually replaced by dashes.
    
    cpd_no_salt = reduce(replace, salt_dict, init=cpd)
    
    rq = rootURL * lowercase(cpd_no_salt) * typeURL * user_key
    
    try
        resRq = HTTP.get(rq)
        moaRQ = JSON.Parser.parse(String(resRq.body))[1]
        return(moaRQ)
    catch e
        if isa(e, BoundsError)
            rq = rootURL * uppercase(cpd_no_salt) * typeURL * user_key
            try
                resRq = HTTP.get(rq)
                moaRQ = JSON.Parser.parse(String(resRq.body))[1]
                return(moaRQ)
            catch e
                if isa(e, BoundsError)
                println(cpd*" does not have an MOA annotation.")
                end
            end
        elseif isa(e, HTTP.ExceptionRequest.StatusError)
            println(cpd*" raises a 502 error.")
        end
        return()
    end
end

In [None]:
MOA = Dict()
targets = Dict()
for cpd in RMPV.Condition
    moa = get_MOA_and_target(cpd)
    if length(moa) > 0
        if "moa" in keys(moa)
            MOA[cpd] = moa["moa"]
        end
        if "target" in keys(moa)
            targets[cpd] = moa["target"]
        end
    end
end

In [None]:
cpd_list = RMPV.Condition
cpd_list = map(x -> reduce(replace, salt_dict, init=x), cpd_list)
cpd_list = map(x -> replace(x,  " " => s"-"), cpd_list)

In [None]:
short_to_initial_cpd = Dict(y => x for (x,y) in zip(RMPV.Condition, cpd_list))

In [None]:
for cpd in cpd_list
    moa = get_MOA_and_target(cpd)
    if length(moa) > 0
        if "moa" in keys(moa)
            MOA[short_to_initial_cpd[cpd]] = moa["moa"]
        end
        if "target" in keys(moa)
            targets[short_to_initial_cpd[cpd]] = moa["target"]
        end
    end
end

In [None]:
using HDF5, JLD
save("data/MOA2.jld", "data", MOA)
save("data/target2.jld", "data", targets)

In [None]:
MOA

In [None]:
targets

You can save the MOA dictionary for later use:
```julia
using HDF5, JLD
save("data/MOA.jld", "data", MOA)
save("data/target.jld", "data", targets)
MOA = load("data/MOA.jld")["data"]
targets = load("data/target.jld")["data"]
```

In [None]:
using HDF5, JLD
MOA = load("data/MOA2.jld")["data"]
targets = load("data/target2.jld")["data"]

In [None]:
freqMOA = sort(freqtable(vcat(collect(values(MOA))...)), rev = true);

In [None]:
hitMOA = Dict(x => MOA[x] for x in RMPV.Condition[RMPV.RMPV .< 0.1] if x in keys(MOA))
freqHitMOA = sort(freqtable(vcat(collect(values(hitMOA))...)), rev = true);

In [None]:
dfMOA = DataFrame()
dfMOA.MOA = repeat(names(freqMOA)[1][freqMOA .> 3], 2)
dfMOA.Hits = repeat([true, false], inner = Int(length(dfMOA.MOA)/2))
dfMOA.Count = zeros(length(dfMOA.MOA));

In [None]:
for row in eachrow(dfMOA)
    if row.Hits
        if row.MOA in names(freqHitMOA)[1]
            row.Count = Dict(freqHitMOA)[row.MOA]
        end
    else
        if row.MOA in names(freqHitMOA)[1]
            row.Count = Dict(freqMOA)[row.MOA] - Dict(freqHitMOA)[row.MOA]
        else
            row.Count = Dict(freqMOA)[row.MOA]
        end
        
        
    end
end
dfMOA.MOA = CategoricalArray{String,1}(dfMOA.MOA, levels = reverse(names(freqMOA)[1][freqMOA .> 3]), ordered=true);

In [None]:
gp = ggplot(dfMOA, aes(x = :MOA, y = :Count, fill = :Hits)) + 
     geom_bar(position="stack", stat="identity") + coord_flip() + 
     scale_y_continuous(breaks = 0:2:12) +
     theme(var"legend.position"="bottom") +
     RObject(nothing)

In [None]:
ggsave("fig/HitEnrichment.pdf", gp);

## MOA analysis

Now we focus on MOAs with at least 2 hit compounds:

In [None]:
top_moa = names(freqHitMOA)[1][freqHitMOA .>= 2]

This means focusing on the following hit compounds:

In [None]:
top_moa_hit_cpd = Set([k for (k,v) in MOA 
                         for m in v 
                         if m in top_moa]) ∩
                  RMPV.Condition[RMPV.RMPV .< 0.1]

Targets are known for all these compounds:

In [None]:
@assert all([x in keys(targets) for x in top_moa_hit_cpd])

### MOA morphological similarity

#### Load aggregated data

In [None]:
aggregatedData = CSV.read("data/aggregatedData_750cells.csv", DataFrame);

#### Transform aggregated data - Normalization
We want to focus on variables that are changing more overall than inside of reference condition (untreated WT).

In [None]:
expAgg = Experiment(aggregatedData, description = "Median values for aggregated FOV measurements")

In [None]:
filters = Array{BioProfiling.AbstractSelector,1}()
# Remove metadata
strToRemove = ["Metadata_Well", "CompoundName", "Metadata_Field", "Metadata_Row", "Metadata_Column"]
push!(filters, NameSelector(x -> !any(occursin.(strToRemove, String(x)))))
# Remove constant columns
push!(filters, Selector(x -> mad(x, normalize = true) != 0, description = "Remove constant features"));
push!(filters, Selector(x -> mad(x, normalize = true) != 0, 
                        subset = x -> x.CompoundName .== "DMSO", 
                        description = "Remove features constant for reference"))
select_features!(expAgg, filters)

In [None]:
expTransformed = deepcopy(expAgg)
logtransform!(expTransformed)
expTransformed.description = "Transformed values for aggregated FOV measurements"

Here we apply a correction based on the specific details of the experimental design:
All rows and columns include DMSO (negative) controls and we normalize all values based on these matchings controls (same row and column).  

This examplifies how to directly modify the data of an `Experiment` object.  

NB: One might want to check that more iterations are not needed (cf. Median-polish method).

In [None]:
# Normalize on matching DMSO wells median values

# Entries in both data frames are matching
@assert nrow(aggregatedData) == nrow(getdata(expTransformed))

# Copy data before correction
ndf = getdata(expTransformed)

for (i, (fx, fy)) in enumerate(eachrow(aggregatedData[:,[:Metadata_Row, :Metadata_Column]])) 
    c1 = aggregatedData.CompoundName .== "DMSO"
    c2 = aggregatedData.Metadata_Row .== fx
    c3 = aggregatedData.Metadata_Column .== fy
    @assert sum((c1 .& (c2 .| c3))) > 0
    expTransformed.data[i:i, expTransformed.selected_features] .-= 
        mapcols(median, ndf[(c1 .& (c2 .| c3)),:]) 
end

In [None]:
compare_in(x,y) = x in y
Broadcast.broadcasted(::typeof(compare_in), x, y) = broadcast(in, x, Ref(y)) 
# We now only keep hit compounds
filter_entries!(expTransformed, Filter(top_moa_hit_cpd, :CompoundName, compare = compare_in));

In [None]:
expTransformed

### Dimensionality reduction

In [None]:
using Distances
Random.seed!(3895)
umTPM = umap(expTransformed, 4, metric = CosineDist())
umTPM = convert(DataFrame, umTPM')
rename!(umTPM, Symbol.("UMAP" .* string.(1:ncol(umTPM))));

In [None]:
top_moa

In [None]:
umTPM.Compound = expTransformed.data.CompoundName[expTransformed.selected_entries]
umTPM.MOA = [[y for y in MOA[x] if y in top_moa] for x in umTPM.Compound]
umTPM.MOA2 = CategoricalArray(string.(umTPM.MOA));

In [None]:
ggplot(umTPM, aes(:UMAP1, :UMAP2)) + 
    geom_point(aes(color = :MOA2), alpha = 0.8) +
    coord_fixed() + 
    theme(var"legend.position"="bottom", var"legend.spacing.x" = unit(0.35, "cm"), 
    var"legend.spacing.y" = unit(0, "cm")) + 
    guides(color=guide_legend(nrow=3,byrow=true))

In [None]:
ggplot(umTPM, aes(:UMAP3, :UMAP4)) + 
    geom_point(aes(color = :MOA2), alpha = 0.8) +
    coord_fixed() + 
    theme(var"legend.position"="bottom", var"legend.spacing.x" = unit(0.35, "cm"), 
    var"legend.spacing.y" = unit(0, "cm")) + 
    guides(color=guide_legend(nrow=3,byrow=true))

In [None]:
expUMAP = Experiment(umTPM, description = "UMAP projection of profiling data")
filters = Array{BioProfiling.AbstractReduce,1}()
# Remove (categorical) compound column from analysis
push!(filters, NameSelector(x -> x != "Compound"))
push!(filters, NameSelector(x -> x != "MOA"))
push!(filters, NameSelector(x -> x != "MOA2"))
# Apply filters
filter!(expUMAP, filters)
expUMAP

In [None]:
pairwise_hellinger = DataFrame([(x, y) for x in levels(top_moa_hit_cpd) for y in levels(top_moa_hit_cpd) if x != y]);
exp_cpd = expTransformed.data[expTransformed.selected_entries, :CompoundName]
pairwise_hellinger.distance = [distance_robust_hellinger(getdata(expUMAP), 
                                                         exp_cpd.==x, 
                                                         exp_cpd.==y) 
                               for (x,y) in eachrow(pairwise_hellinger)]
rename!(pairwise_hellinger, ["X", "Y", "distance"]);

In [None]:
RCall.rcall_p(:options, rcalljl_options=Dict(:width => 1000, :height => 800))
ggplot(pairwise_hellinger, aes(x = :X, y = :Y, fill = :distance)) +
    geom_tile() +
    xlab("") +
    ylab("") + 
    theme(var"axis.text.x" = element_text(angle = 45, hjust = 1)) +
    RObject(nothing)

In [None]:
MOA["Nisoldipine"]

In [None]:
MOA["Paroxetine hydrochloride hemihydrate (MW = 374.83)"]

In [None]:
MOA["Fluoxetine hydrochloride"]

In [None]:
MOA["Cilnidipine"]

In [None]:
MOA["Flunarizine dihydrochloride"]

In [None]:
top_hits_per_moa = Dict(moa => [x for x in top_moa_hit_cpd if moa in MOA[x]] for moa in top_moa)

In [None]:
moa1 = "Selective serotonin reuptake inhibitor (SSRI)"
moa2 = "Calcium channel blocker"

In [None]:
moa_pairwise_hellinger = DataFrame([(x, y) for x in top_moa 
                                           for y in top_moa if x != y]);
moa_pairwise_hellinger.distance = [mean(pairwise_hellinger[
                                        [x in top_hits_per_moa[moa1] for x in pairwise_hellinger.X] .&
                                        [x in top_hits_per_moa[moa2] for x in pairwise_hellinger.Y],
                                        :distance])
                                   for (moa1,moa2) in eachrow(moa_pairwise_hellinger)]
rename!(moa_pairwise_hellinger, ["X", "Y", "distance"]);

In [None]:
RCall.rcall_p(:options, rcalljl_options=Dict(:width => 1000, :height => 800))
ggplot(moa_pairwise_hellinger, aes(x = :X, y = :Y, fill = :distance)) +
    geom_tile() +
    xlab("") +
    ylab("") + 
    theme(var"axis.text.x" = element_text(angle = 45, hjust = 1)) +
    RObject(nothing)

### MOA target PPI similarity

#### Load PPI from HIPPIE database

In [None]:
# Fetch last version of HIPPIE database (2.2 when writing this notebook)
hippie = HTTP.get("http://cbdm-01.zdv.uni-mainz.de/~mschaefer/hippie/hippie_current.txt")
hippie = CSV.read(hippie.body, DataFrame, header = false)
rename!(hippie, ["Uniprot1", "Entrez1", "Uniprot2", "Entrez2", "Confidence", "Experiments"])

HIPPIE's Q&A section suggests:  
```medium confidence (0.63 - second quartile of the HIPPIE score distribution) or high confidence (0.73 - third quartile)```

In [None]:
ggplot(hippie, aes(x = :Confidence, y = "All")) +
    geom_violin() +
    geom_vline(xintercept = 0.63)

In [None]:
hippie = hippie[hippie.Confidence .>= 0.63, 1:4]
hippie = hippie[.!(ismissing.(hippie.Entrez1)), :]
hippie = hippie[.!(ismissing.(hippie.Entrez2)), :]

#### Construct PPI network

In [None]:
allgenes = hippie.Entrez1 ∪ hippie.Entrez2
gene_to_ID = Dict(v => i  for (i,v) in enumerate(allgenes))
ID_to_gene = Dict(i => v  for (i,v) in enumerate(allgenes));

In [None]:
# Duplicated links and self-edges are discarded
G = SimpleGraph(length(allgenes))
map(x -> add_edge!(G, Tuple(x)), eachrow(map(x -> gene_to_ID[x], Array(hippie[:, [:Entrez1, :Entrez2]]))));

In [None]:
function symbol_to_entrez_mygeneinfo(s::AbstractString)
    prefix = "http://mygene.info/v3/query?q=symbol:"
    suffix = "&species=human&fields=entrez"
    rq = prefix*s*suffix
    resRq = HTTP.get(rq)
    entrezRQ = JSON.Parser.parse(String(resRq.body))
    if entrezRQ["total"] > 0
        return(entrezRQ["hits"][1]["_id"])
    else
        return(NaN)
    end
end

In [None]:
alltargets = unique(collect(Iterators.flatten(values(targets))))
alltargets_entrez = symbol_to_entrez_mygeneinfo.(alltargets);

In [None]:
alltargets_dict = Dict(k => v for (k,v) in zip(alltargets, alltargets_entrez))

To quantify closeness of drug modules, we use the $s_{AB}$ score from Menche et al. (Science, 2015) defined as follows:
$s_{AB} = <d_{AB}> - \frac{<d_{AA}>+<d_{BB}>}{2}$

In [None]:
function mean_shortest_distance(cpd1::AbstractString, cpd2::AbstractString)
    entrez1 = [gene_to_ID[parse(Int, alltargets_dict[x])] for x in targets[cpd1]]
    entrez2 = [gene_to_ID[parse(Int, alltargets_dict[x])] for x in targets[cpd2]]
    mean([length(a_star(G, x, y)) for x in entrez1 for y in entrez2])
end

In [None]:
function sAB(cpd1::AbstractString, cpd2::AbstractString)
    dAA = mean_shortest_distance(cpd1, cpd1)
    dAB = mean_shortest_distance(cpd1, cpd2)
    dBB = mean_shortest_distance(cpd2, cpd2)
    return(dAB - ((dAA + dBB)/2))
end

In [None]:
pairwise_sAB = DataFrame([(x, y) for x in levels(top_moa_hit_cpd) for y in levels(top_moa_hit_cpd) if x < y]);
pairwise_sAB.sAB = [sAB(x, y) for (x,y) in eachrow(pairwise_sAB)]
rename!(pairwise_sAB, ["X", "Y", "s_AB"]);

```
rename!(pairwise_sAB, ["X", "Y", "s_AB"]);
save("data/target.jld", "data", pairwise_sAB)
```

In [None]:
RCall.rcall_p(:options, rcalljl_options=Dict(:width => 1000, :height => 800))
ggplot(pairwise_sAB, aes(x = :X, y = :Y, fill = :s_AB)) +
    geom_tile() +
    xlab("") +
    ylab("") + 
    theme(var"axis.text.x" = element_text(angle = 45, hjust = 1)) +
    RObject(nothing)

In [None]:
drug_pairwise_comparison = innerjoin(pairwise_sAB, pairwise_hellinger, on = [:X, :Y]);

In [None]:
ggplot(drug_pairwise_comparison, aes(x = :distance, y = :s_AB)) +
    geom_point() +
    ylab("Drug module separation") +
    xlab("Morphological distance") +
    RObject(nothing)

In [None]:
drug_pairwise_comparison.modules = [x < 1 ? "< 1" : "> 1" for x in drug_pairwise_comparison.s_AB]
drug_pairwise_comparison.closeness = 1.01 .- drug_pairwise_comparison.distance
ggplot(drug_pairwise_comparison, aes(x = :closeness, y = :modules)) +
    geom_violin(draw_quantiles = [0.25, 0.5, 0.75], fill = "#eeeeee") +
    ylab("Drug module separation") +
    xlab("Morphological similarity") +
    scale_x_log10() +
    RObject(nothing)

In [None]:
moa_targets = Dict(k => unique(vcat([targets[cpd] for cpd in v]...)) 
                   for (k,v) in top_hits_per_moa)

In [None]:
function moa_mean_shortest_distance(cpd1::AbstractString, cpd2::AbstractString)
    entrez1 = [gene_to_ID[parse(Int, alltargets_dict[x])] for x in moa_targets[cpd1]]
    entrez2 = [gene_to_ID[parse(Int, alltargets_dict[x])] for x in moa_targets[cpd2]]
    mean([length(a_star(G, x, y)) for x in entrez1 for y in entrez2])
end

In [None]:
function moa_sAB(cpd1::AbstractString, cpd2::AbstractString)
    dAA = moa_mean_shortest_distance(cpd1, cpd1)
    dAB = moa_mean_shortest_distance(cpd1, cpd2)
    dBB = moa_mean_shortest_distance(cpd2, cpd2)
    return(dAB - ((dAA + dBB)/2))
end

In [None]:
moa_pairwise_sAB = DataFrame([(x, y) for x in keys(moa_targets) for y in keys(moa_targets) if x < y]);
moa_pairwise_sAB.sAB = [moa_sAB(x, y) for (x,y) in eachrow(moa_pairwise_sAB)]
rename!(moa_pairwise_sAB, ["X", "Y", "s_AB"]);

In [None]:
RCall.rcall_p(:options, rcalljl_options=Dict(:width => 1000, :height => 800))
ggplot(moa_pairwise_sAB, aes(x = :X, y = :Y, fill = :s_AB)) +
    geom_tile() +
    xlab("") +
    ylab("") + 
    theme(var"axis.text.x" = element_text(angle = 45, hjust = 1)) +
    RObject(nothing)

In [None]:
moa_pairwise_comparison = innerjoin(moa_pairwise_sAB, moa_pairwise_hellinger, on = [:X, :Y]);

In [None]:
ggplot(moa_pairwise_comparison, aes(x = :distance, y = :s_AB)) +
    geom_point() +
    ylab("Drug module separation") +
    xlab("Morphological distance") +
    ggtitle("Spearman correlation: " * 
        string(round(
        corspearman(moa_pairwise_comparison.s_AB, moa_pairwise_comparison.distance), digits = 3))) + 
    RObject(nothing)

In [None]:
moa_pairwise_comparison.modules = [x < 0.2 ? "< 0.2" : 
                                   x < 0.4 ? "< 0.4" : 
                                   x < 0.6 ? "< 0.6" : 
                                   x < 0.8 ? "< 0.8" : ">= 0.8" for x in moa_pairwise_comparison.s_AB]
moa_pairwise_comparison.closeness = 1.01 .- moa_pairwise_comparison.distance
ggplot(moa_pairwise_comparison, aes(x = :closeness, y = :modules)) +
    geom_violin(draw_quantiles = [0.25, 0.5, 0.75], fill = "#eeeeee") +
    ylab("Drug module separation") +
    xlab("Morphological similarity") +
    scale_x_log10() +
    RObject(nothing)

In [None]:
moa_pairwise_comparison.modules = string.(round.(moa_pairwise_comparison.s_AB, digits = 1))
moa_pairwise_comparison.closeness = 1.01 .- moa_pairwise_comparison.distance
ggplot(moa_pairwise_comparison, aes(x = :closeness, y = :modules)) +
    geom_boxplot(fill = "#eeeeee", var"outlier.alpha" = 0) +
    geom_jitter(height = 0.2) +
    ylab("Drug module separation") +
    xlab("Morphological similarity") +
    scale_x_log10() +
    coord_flip() +
    RObject(nothing)