In [1]:
import pandas as pd

Replicability

In [2]:
replicability_pvalue = pd.read_csv('output/replicability_pvalue.csv')
replicability_cp_dp_pvalue = replicability_pvalue.query('Feature_set!="DP-CP"').reset_index(drop=True)
replicability_dp_vs_cp_pvalue = replicability_pvalue.query('Feature_set=="DP-CP"').reset_index(drop=True)

replicability_mmap = pd.concat(
    [
        pd.read_csv('output/cellprofiler_replicability_mmap.csv').assign(Feature_set="CellProfiler"),
        pd.read_csv('output/deepprofiler_replicability_mmap.csv').assign(Feature_set="DeepProfiler")
    ],
    join="inner",
    )


In [3]:
print(
    replicability_cp_dp_pvalue.merge(replicability_mmap, on=['Feature_set','Modality','Cell','time'])
    .sort_values(by=['Feature_set','Modality','Cell','time'])
    .to_markdown(index=False)
)

| t-test        | Feature_set   | Modality   | Cell   | time   |   statistic | pvalue   |   mmAP |
|:--------------|:--------------|:-----------|:-------|:-------|------------:|:---------|-------:|
| Replicability | CellProfiler  | compound   | A549   | long   |     38.2031 | <0.05    |  0.703 |
| Replicability | CellProfiler  | compound   | A549   | short  |     25.0861 | <0.05    |  0.532 |
| Replicability | CellProfiler  | compound   | U2OS   | long   |     22.0976 | <0.05    |  0.496 |
| Replicability | CellProfiler  | compound   | U2OS   | short  |     25.6074 | <0.05    |  0.55  |
| Replicability | CellProfiler  | crispr     | A549   | long   |     23.4325 | <0.05    |  0.394 |
| Replicability | CellProfiler  | crispr     | A549   | short  |     23.1375 | <0.05    |  0.373 |
| Replicability | CellProfiler  | crispr     | U2OS   | long   |     20.719  | <0.05    |  0.255 |
| Replicability | CellProfiler  | crispr     | U2OS   | short  |     21.2979 | <0.05    |  0.334 |
| Replicab

In [4]:
print(
    replicability_dp_vs_cp_pvalue
    .sort_values(by=['Feature_set','Modality','Cell','time'])
    .to_markdown(index=False)
)

| t-test        | Feature_set   | Modality   | Cell   | time   |   statistic | pvalue   |
|:--------------|:--------------|:-----------|:-------|:-------|------------:|:---------|
| Replicability | DP-CP         | compound   | U2OS   | long   |     1.24786 | 0.21     |
| Replicability | DP-CP         | crispr     | U2OS   | long   |     4.50381 | <0.05    |
| Replicability | DP-CP         | orf        | U2OS   | long   |     2.78749 | <0.05    |


Same perturbation matching

In [5]:
matching_pvalue = pd.read_csv('output/matching_pvalue.csv')
matching_cp_dp_pvalue = matching_pvalue.query('Feature_set!="DP-CP"').reset_index(drop=True)
matching_dp_vs_cp_pvalue = matching_pvalue.query('Feature_set=="DP-CP"').reset_index(drop=True)

matching_mmap = pd.concat(
    [
        pd.read_csv('output/cellprofiler_matching_mmap.csv').assign(Feature_set="CellProfiler"),
        pd.read_csv('output/deepprofiler_matching_mmap.csv').assign(Feature_set="DeepProfiler")
    ],
    join="inner",
    )


In [6]:
print(
    matching_cp_dp_pvalue.merge(matching_mmap, on=['Feature_set','Modality','Cell','time'])
    .sort_values(by=['Feature_set','Modality','Cell','time'])
    .to_markdown(index=False)
)

| t-test                          | Feature_set   | Modality   | Cell   | time   |   statistic | pvalue   |   mmAP |
|:--------------------------------|:--------------|:-----------|:-------|:-------|------------:|:---------|-------:|
| Same perturbation type matching | CellProfiler  | compound   | A549   | long   |    11.0019  | <0.05    |  0.12  |
| Same perturbation type matching | CellProfiler  | compound   | A549   | short  |    10.9935  | <0.05    |  0.108 |
| Same perturbation type matching | CellProfiler  | compound   | U2OS   | long   |    11.4465  | <0.05    |  0.11  |
| Same perturbation type matching | CellProfiler  | compound   | U2OS   | short  |    11.2009  | <0.05    |  0.078 |
| Same perturbation type matching | CellProfiler  | crispr     | A549   | long   |     4.92739 | <0.05    |  0.088 |
| Same perturbation type matching | CellProfiler  | crispr     | A549   | short  |     4.29482 | <0.05    |  0.062 |
| Same perturbation type matching | CellProfiler  | crispr     |

In [7]:
print(
    matching_dp_vs_cp_pvalue
    .sort_values(by=['Feature_set','Modality','Cell','time'])
    .to_markdown(index=False)
)

| t-test                          | Feature_set   | Modality   | Cell   | time   |   statistic | pvalue   |
|:--------------------------------|:--------------|:-----------|:-------|:-------|------------:|:---------|
| Same perturbation type matching | DP-CP         | compound   | U2OS   | long   |   -4.29094  | <0.05    |
| Same perturbation type matching | DP-CP         | crispr     | U2OS   | long   |    0.210606 | 0.83     |


Different perturbation matching

In [8]:
gene_compound_matching_pvalue = pd.read_csv('output/gene_compound_matching_pvalue.csv')
gene_compound_matching_cp_dp_pvalue = gene_compound_matching_pvalue.query('Feature_set!="DP-CP"').reset_index(drop=True)
gene_compound_matching_dp_vs_cp_pvalue = gene_compound_matching_pvalue.query('Feature_set=="DP-CP"').reset_index(drop=True)

gene_compound_matching_mmap = pd.concat(
    [
        pd.read_csv('output/cellprofiler_gene_compound_matching_mmap.csv').assign(Feature_set="CellProfiler"),
        pd.read_csv('output/deepprofiler_gene_compound_matching_mmap.csv').assign(Feature_set="DeepProfiler")
    ],
    join="inner",
    )

In [9]:
print(
    gene_compound_matching_cp_dp_pvalue.merge(gene_compound_matching_mmap, on=['Feature_set','Modality1','Modality2','Cell'])
    .sort_values(by=['Feature_set','Modality1','Modality2','Cell'])
    .to_markdown(index=False)
)

| t-test                 | Feature_set   | Modality1      | Modality2    | Cell   |   statistic | pvalue   |   mmAP |
|:-----------------------|:--------------|:---------------|:-------------|:-------|------------:|:---------|-------:|
| Compoung-gene matching | CellProfiler  | compound_long  | crispr_long  | A549   |     5.48398 | <0.05    |  0.032 |
| Compoung-gene matching | CellProfiler  | compound_long  | crispr_long  | U2OS   |     5.68521 | <0.05    |  0.033 |
| Compoung-gene matching | CellProfiler  | compound_long  | crispr_short | A549   |     5.23907 | <0.05    |  0.032 |
| Compoung-gene matching | CellProfiler  | compound_long  | crispr_short | U2OS   |     3.63061 | <0.05    |  0.024 |
| Compoung-gene matching | CellProfiler  | compound_long  | orf_long     | A549   |     5.00785 | <0.05    |  0.03  |
| Compoung-gene matching | CellProfiler  | compound_long  | orf_long     | U2OS   |     6.06296 | <0.05    |  0.043 |
| Compoung-gene matching | CellProfiler  | compound_long

In [10]:
print(
    gene_compound_matching_dp_vs_cp_pvalue
    .sort_values(by=['Feature_set','Modality1','Modality2','Cell'])
    .to_markdown(index=False)
)

| t-test                 | Feature_set   | Modality1     | Modality2   | Cell   |   statistic |   pvalue |
|:-----------------------|:--------------|:--------------|:------------|:-------|------------:|---------:|
| Compoung-gene matching | DP-CP         | compound_long | crispr_long | U2OS   |   -0.696465 |     0.49 |
| Compoung-gene matching | DP-CP         | compound_long | orf_long    | U2OS   |   -0.640213 |     0.52 |


Different pertubation matching CRISPR vs. ORF

In [11]:
gene_compound_matching_crispr_orf_pvalue = pd.read_csv('output/crispr_orf_pvalue.csv')


In [12]:
print(
    gene_compound_matching_crispr_orf_pvalue
    .sort_values(by=['Feature_set','Modality1','Modality2','Cell'])
    [['t-test', 'Feature_set', 'Modality1', 'Modality2', 'Cell', 'statistic', 'pvalue']]
    .to_markdown(index=False)
)

| t-test                                  | Feature_set   | Modality1      | Modality2              | Cell   |   statistic | pvalue   |
|:----------------------------------------|:--------------|:---------------|:-----------------------|:-------|------------:|:---------|
| Compoung-gene matching - CRISPR vs. ORF | CellProfiler  | compound_long  | crispr_long-orf_long   | A549   |   0.247125  | 0.80     |
| Compoung-gene matching - CRISPR vs. ORF | CellProfiler  | compound_long  | crispr_long-orf_long   | U2OS   |  -1.06864   | 0.29     |
| Compoung-gene matching - CRISPR vs. ORF | CellProfiler  | compound_long  | crispr_long-orf_short  | A549   |  -0.0291253 | 0.98     |
| Compoung-gene matching - CRISPR vs. ORF | CellProfiler  | compound_long  | crispr_long-orf_short  | U2OS   |  -1.74948   | 0.08     |
| Compoung-gene matching - CRISPR vs. ORF | CellProfiler  | compound_long  | crispr_short-orf_long  | A549   |   0.220637  | 0.83     |
| Compoung-gene matching - CRISPR vs. ORF | Cell