# Notebook - Supplementary tables
In this notebook, we explore intermediate results from other notebooks and format them in descriptive tables.

In [1]:
using HDF5, JLD

In [2]:
using CSV, StatsBase, Statistics, DataFrames, FreqTables
using BioProfiling

In [3]:
using Dates: now
now()

2021-11-06T15:14:47.504

## Load measurements

### Coarse-grain aggregation

In [4]:
aggregatedData = CSV.read("data/aggregatedData_750cells_noSparse.csv", DataFrame);

### Transform aggregated data - Normalization
We want to focus on variables that are changing more overall than inside of reference condition (untreated WT).

In [5]:
expAgg = Experiment(aggregatedData, description = "Median values for aggregated FOV measurements")

"Experiment with 6622/6622 entries and 390/390 features selected."

In [6]:
filters = Array{BioProfiling.AbstractSelector,1}()
# Remove metadata
strToRemove = ["Metadata_Well", "CompoundName", "Metadata_Field", "Metadata_Row", "Metadata_Column"]
push!(filters, NameSelector(x -> !any(occursin.(strToRemove, String(x)))))
# Remove constant columns
push!(filters, Selector(x -> mad(x, normalize = true) != 0, description = "Remove constant features"));
push!(filters, Selector(x -> mad(x, normalize = true) != 0, 
                        subset = x -> x.CompoundName .== "DMSO", 
                        description = "Remove features constant for reference"))
select_features!(expAgg, filters)

In [7]:
expTransformed = deepcopy(expAgg)
logtransform!(expTransformed)
expTransformed.description = "Transformed values for aggregated FOV measurements"

"Transformed values for aggregated FOV measurements"

Here we apply a correction based on the specific details of the experimental design:
All rows and columns include DMSO (negative) controls and we normalize all values based on these matchings controls (same row and column).  

This examplifies how to directly modify the data of an `Experiment` object.  

NB: One might want to check that more iterations are not needed (cf. Median-polish method).

In [8]:
# Normalize on matching DMSO wells median values

# Entries in both data frames are matching
@assert nrow(aggregatedData) == nrow(getdata(expTransformed))

# Copy data before correction
ndf = getdata(expTransformed)

for (i, (fx, fy)) in enumerate(eachrow(aggregatedData[:,[:Metadata_Row, :Metadata_Column]])) 
    c1 = aggregatedData.CompoundName .== "DMSO"
    c2 = aggregatedData.Metadata_Row .== fx
    c3 = aggregatedData.Metadata_Column .== fy
    @assert sum((c1 .& (c2 .| c3))) > 0
    expTransformed.data[i:i, expTransformed.selected_features] .-= 
        mapcols(median, ndf[(c1 .& (c2 .| c3)),:]) 
    expTransformed.data[i:i, expTransformed.selected_features] ./= 
        mapcols(mad, ndf[(c1 .& (c2 .| c3)),:]) 
end

select_features!(expTransformed,
                 Selector(x -> !any(isinf.(x)), 
                        description = "Remove features with infinite values " *
                            "(i.e. with no variation for a subset of the DMSO images)"));

decorrelate_by_mad!(expTransformed);

In [9]:
println.(names(getdata(expTransformed)))

Granularity_10_CorrCM_median
Granularity_9_CorrCM_median
AreaShape_FormFactor_1_median
AreaShape_Solidity_median
AreaShape_Zernike_3_1_1_median
Intensity_MassDisplacement_CorrDNA_1_median
Granularity_7_CorrActin_median
AreaShape_Compactness_median
AreaShape_Compactness_1_median
RadialDistribution_RadialCV_CorrActin_2of3_median
Granularity_3_CorrDNA_median
AreaShape_FormFactor_2_median
AreaShape_Zernike_6_4_median
AreaShape_Zernike_8_6_2_median
AreaShape_Zernike_8_8_2_median
Intensity_MADIntensity_CorrCM_1_median
RadialDistribution_RadialCV_CorrActin_1of3_median
Intensity_MassDisplacement_CorrActin_median
AreaShape_Zernike_9_9_2_median
Granularity_1_CorrCM_median
Granularity_2_CorrDNA_median
AreaShape_Zernike_2_2_2_median
AreaShape_Zernike_8_2_2_median
AreaShape_Zernike_6_4_1_median
AreaShape_Zernike_4_2_median
Intensity_MinIntensityEdge_CorrActin_median
AreaShape_Zernike_3_1_median
RadialDistribution_FracAtD_CorrDNA_1of3_median
Intensity_MassDisplacement_CorrActin_1_median
AreaShape_Ze

199-element Array{Nothing,1}:
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 ⋮
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing
 nothing

## Interpretation - STable 2

In [10]:
most_variable_features(expTransformed, top = 10)

10-element Array{String,1}:
 "Granularity_10_CorrCM_median"
 "Granularity_9_CorrCM_median"
 "AreaShape_FormFactor_1_median"
 "AreaShape_Solidity_median"
 "AreaShape_Zernike_3_1_1_median"
 "Intensity_MassDisplacement_CorrDNA_1_median"
 "Granularity_7_CorrActin_median"
 "AreaShape_Compactness_median"
 "AreaShape_Compactness_1_median"
 "RadialDistribution_RadialCV_CorrActin_2of3_median"

## Interpret MOAs - Stable 3

In [11]:
MOA = load("data/MOA.jld")["data"]

Dict{Any,Any} with 112 entries:
  "Amsacrine hydrochloride"         => Any["Topoisomerase inhibitor"]
  "CP466722"                        => Any["ATM kinase inhibitor"]
  "Nicardipine hydrochloride"       => Any["Calcium channel blocker"]
  "Ro 90-7501"                      => Any["Beta amyloid inhibitor"]
  "Trimipramine maleate"            => Any["Norepinephrine reuptake inhibitor",…
  "Sertaconazole nitrate"           => Any["Sterol demethylase inhibitor"]
  "Tizanidine hydrochloride"        => Any["Adrenergic receptor agonist"]
  "Y-27632 dihydrochloride"         => Any["Rho associated kinase inhibitor"]
  "Diphenyleneiodonium chloride"    => Any["Nitric oxide synthase inhibitor"]
  "Emetine dihydrochloride hydrate" => Any["Protein synthesis inhibitor"]
  "Gefitinib"                       => Any["EGFR inhibitor"]
  "Mibefradil dihydrochloride"      => Any["T-type calcium channel blocker"]
  "Mycophenolic Acid"               => Any["Dehydrogenase inhibitor", "Inositol…
  "BW 723C86"

In [12]:
sort(freqtable(collect(values(MOA))))

74-element Named Array{Int64,1}
Dim1                                                                                                                                                                                                                         │ 
─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┼───
Any["ATPase inhibitor"]                                                                                                                                                                                                      │  1
Any["ATPase inhibitor", "Gastrin inhibitor"]                                                                                                                                                                                 │  1
Any["Acetylcholinesterase inhibitor", "Acetylcholine release stimu

In [13]:
for (k,v) in MOA
    if "Selective serotonin reuptake inhibitor (SSRI)" in v
        println(k,v)
    end
end

Fluoxetine hydrochlorideAny["Selective serotonin reuptake inhibitor (SSRI)"]
Paroxetine hydrochloride hemihydrate (MW = 374.83)Any["Selective serotonin reuptake inhibitor (SSRI)"]


In [14]:
for (k,v) in MOA
    if "Norepinephrine reuptake inhibitor" in v
        println(k,v)
    end
end

Trimipramine maleateAny["Norepinephrine reuptake inhibitor", "Tricyclic antidepressant"]
Maprotiline hydrochlorideAny["Norepinephrine reuptake inhibitor", "Tricyclic antidepressant"]
Imipramine hydrochlorideAny["Norepinephrine reuptake inhibitor", "Serotonin reuptake inhibitor"]
MaprotilineAny["Norepinephrine reuptake inhibitor", "Tricyclic antidepressant"]


In [15]:
for (k,v) in MOA
    if "Histamine receptor antagonist" in v
        println(k,v)
    end
end

LoratadineAny["Histamine receptor antagonist"]
Pheniramine maleateAny["Histamine receptor antagonist"]


In [16]:
for (k,v) in MOA
    if "CDK inhibitor" in v
        println(k,v)
    end
end

KenpaulloneAny["CDK inhibitor", "Glycogen synthase kinase inhibitor"]
RoscovitineAny["CDK inhibitor"]


In [17]:
for (k,v) in MOA
    if "Glycogen synthase kinase inhibitor" in v
        println(k,v)
    end
end

SB 415286Any["Glycogen synthase kinase inhibitor"]
KenpaulloneAny["CDK inhibitor", "Glycogen synthase kinase inhibitor"]


In [18]:
expTransformed.data.MOA = [x in keys(MOA) ? MOA[x] : [] for x in expTransformed.data.CompoundName];

In [19]:
pert = "Glycogen synthase kinase inhibitor"
filtpert = Filter(pert, :MOA, compare = (x,y) -> y in x)
filtref = Filter("DMSO", :CompoundName)

Filter("DMSO", :CompoundName, isequal, "No description provided")

In [20]:
function format_differences_moa(pert::String)
    filtpert = Filter(pert, :MOA, compare = (x,y) -> y in x)
    filtref = Filter("DMSO", :CompoundName)
    
    println("Perturbation: "*pert)   
    # Display how many profiles are averaged
    println(freqtable(expTransformed, filtpert))
 
    println(join([pert, characteristic_features(expTransformed,
                            filtpert, 
                            filtref,
                            top = 4)...], '\t'))
end

format_differences_moa (generic function with 1 method)

In [21]:
cdk_top = format_differences_moa("CDK inhibitor");

Perturbation: CDK inhibitor
2-element Named Array{Int64,1}
Dim1      │ 
──────────┼─────
Discarded │ 6583
Kept      │   39
CDK inhibitor	AreaShape_Zernike_8_6_1_median	RadialDistribution_ZernikeMagnitude_CorrCM_9_3_median	Intensity_MaxIntensity_CorrCM_median	AreaShape_Zernike_4_0_1_median


In [22]:
glyco_top = format_differences_moa("Glycogen synthase kinase inhibitor");

Perturbation: Glycogen synthase kinase inhibitor
2-element Named Array{Int64,1}
Dim1      │ 
──────────┼─────
Discarded │ 6583
Kept      │   39
Glycogen synthase kinase inhibitor	RadialDistribution_ZernikeMagnitude_CorrDNA_7_3_median	AreaShape_Zernike_8_6_1_median	RadialDistribution_ZernikeMagnitude_CorrCM_9_3_median	Intensity_MADIntensity_CorrCM_median


In [23]:
hista_top = format_differences_moa("Histamine receptor antagonist");

Perturbation: Histamine receptor antagonist
2-element Named Array{Int64,1}
Dim1      │ 
──────────┼─────
Discarded │ 6582
Kept      │   40
Histamine receptor antagonist	AreaShape_Zernike_4_2_2_median	AreaShape_Zernike_4_0_1_median	RadialDistribution_ZernikeMagnitude_CorrCM_9_3_median	Intensity_MaxIntensity_CorrCM_median


In [24]:
ssri_top = format_differences_moa("Selective serotonin reuptake inhibitor (SSRI)");

Perturbation: Selective serotonin reuptake inhibitor (SSRI)
2-element Named Array{Int64,1}
Dim1      │ 
──────────┼─────
Discarded │ 6582
Kept      │   40
Selective serotonin reuptake inhibitor (SSRI)	AreaShape_Zernike_4_2_2_median	AreaShape_Zernike_4_0_1_median	RadialDistribution_ZernikeMagnitude_CorrDNA_7_3_median	Granularity_1_CorrActin_median


In [25]:
nore_top = format_differences_moa("Norepinephrine reuptake inhibitor");

Perturbation: Norepinephrine reuptake inhibitor
2-element Named Array{Int64,1}
Dim1      │ 
──────────┼─────
Discarded │ 6544
Kept      │   78
Norepinephrine reuptake inhibitor	AreaShape_Zernike_4_2_2_median	AreaShape_Zernike_4_0_1_median	RadialDistribution_ZernikeMagnitude_CorrDNA_7_3_median	AreaShape_Zernike_9_3_2_median


## STable 1

In [26]:
using HDF5, JLD
MOA = load("data/MOA.jld")["data"]
targets = load("data/target.jld")["data"];

In [27]:
transferlist = CSV.read("data/transferList.txt", DataFrame);

In [28]:
sf1 = DataFrame(CompoundName = sort(collect(skipmissing(unique(transferlist.CompoundName)))));

In [29]:
sf1.MOA = map(x -> x in keys(MOA) ? Array{String}(MOA[x]) : missing, sf1.CompoundName);

In [30]:
sf1.Targets = map(x -> x in keys(targets) ? Array{String}(targets[x]) : missing, sf1.CompoundName);

In [31]:
RMPV = CSV.read("data/RMPV.csv", DataFrame) 
RMPV1500 = CSV.read("data/RMPV_1500.csv", DataFrame);

In [32]:
dict_rmpv_750 = Dict(row.Condition => row.RMPV for row in eachrow(RMPV))
dict_rmpv_1500 = Dict(row.Condition => row.RMPV for row in eachrow(RMPV1500));

In [33]:
sf1.RMPV750 = map(x -> x in keys(dict_rmpv_750) ? dict_rmpv_750[x] : missing, sf1.CompoundName)
sf1.RMPV1500 = map(x -> x in keys(dict_rmpv_1500) ? dict_rmpv_1500[x] : missing, sf1.CompoundName);

In [34]:
sf1.RMPV750 = round.(sf1.RMPV750, digits = 4)
sf1.RMPV1500 = round.(sf1.RMPV1500, digits = 4);

In [35]:
# Concentration * volume in nL out of 50 µL * 1000 (convert to µM)
transferlist.Concentration = 1000 * transferlist.SourceConc_mM .* transferlist.VolumeTransferred_A / 50000;
concDict = Dict(row.CompoundName => row.Concentration  for row in eachrow(transferlist))

Dict{Union{Missing, String},Union{Missing, Float64}} with 312 entries:
  "Clodronic acid"                     => 20.0
  "Amsacrine hydrochloride"            => 2.0
  "SID 3712249"                        => 20.0
  "CP466722"                           => 20.0
  "U-101958 maleate"                   => 2.0
  "5-azacytidine"                      => 19.9982
  "AZ191"                              => 20.0
  "Trimipramine maleate"               => 20.0
  "Thiabendazole"                      => 20.0
  "Sertaconazole nitrate"              => 20.0
  "(\xb1)-Isoproterenol hydrochloride" => 20.0
  "Benoxathian hydrochloride"          => 20.0
  "Tizanidine hydrochloride"           => 20.0
  "Lubeluzole dihydrochloride"         => 20.0
  "GW9662"                             => 20.0
  "Emetine dihydrochloride hydrate"    => 20.0
  "Tyrphostin AG 879"                  => 20.0
  "Lorcainide hydrochloride"           => 2.0
  "Mibefradil dihydrochloride"         => 0.2
  "Ro 11-1464"                       

In [36]:
sf1.Concentration = [concDict[x] for x in sf1.CompoundName];

In [37]:
CSV.write("data/SuppTab1.csv", sf1);