In [1]:
import pandas as pd
import utils
import warnings
import random

warnings.simplefilter(action="ignore", category=FutureWarning)
random.seed(12527)

In [2]:
# Read the data
data = pd.read_parquet('../compare_parquet/data.parquet')

In [3]:
random_baseline_ap = pd.DataFrame(columns=["ap", "n_matches", "n_reference"])
results = pd.DataFrame()

for cell in data.Metadata_Cell_type.unique():
    cell_df = data.query("Metadata_Cell_type == @cell")
    for perturbation in cell_df.Metadata_Perturbation.unique():
        perturbation_df = cell_df.query("Metadata_Perturbation == @perturbation")
        for time in perturbation_df.Metadata_Time.unique():
            time_df = perturbation_df.query("Metadata_Time == @time").copy()
            
            if perturbation == "compound":
                time_df["Metadata_broad_sample"].fillna("DMSO", inplace=True)
            
            time_df = utils.remove_empty_wells(time_df).reset_index(drop=True)

            match_dict = {
                "filter": {"Metadata_control_type": ["'negcon'"]},
                "matching": ["Metadata_broad_sample"],
            }

            reference_dict = {
                "filter": {
                    "Metadata_pert_type": ["'trt'"],
                    "Metadata_control_type": [
                        "'poscon_orf'",
                        "'poscon_diverse'",
                        "'poscon_cp'",
                    ],
                },
                "matching": ["Metadata_Plate"],
            }
            
            print(f'Computing metric for {perturbation}-{cell}-{time}')

            metric = utils.AveragePrecision_non_vectorized(
                time_df,
                match_dict,
                reference_dict,
                100,
                random_baseline_ap,
                anti_match=False,
            )

            results = pd.concat([results, (
                metric.ap
                .assign(Metadata_Perturbation=perturbation)
                .assign(Cell_Time=f'{cell}_{utils.time_point(perturbation, time)}')
            )], axis=0)

Computing metric for compound-A549-24


100%|██████████| 306/306 [02:23<00:00,  2.14it/s]


Computing metric for compound-A549-48


100%|██████████| 306/306 [01:51<00:00,  2.75it/s]


Computing metric for crispr-A549-144


100%|██████████| 305/305 [02:14<00:00,  2.27it/s]


Computing metric for crispr-A549-96


100%|██████████| 305/305 [01:56<00:00,  2.62it/s]


Computing metric for orf-A549-96


100%|██████████| 160/160 [00:53<00:00,  3.00it/s]


Computing metric for orf-A549-48


100%|██████████| 160/160 [00:53<00:00,  3.00it/s]


Computing metric for compound-U2OS-24


100%|██████████| 306/306 [03:51<00:00,  1.32it/s]


Computing metric for compound-U2OS-48


100%|██████████| 306/306 [01:56<00:00,  2.63it/s]


Computing metric for crispr-U2OS-144


100%|██████████| 305/305 [01:54<00:00,  2.66it/s]


Computing metric for crispr-U2OS-96


100%|██████████| 305/305 [01:49<00:00,  2.78it/s]


Computing metric for orf-U2OS-48


100%|██████████| 160/160 [00:50<00:00,  3.14it/s]


Computing metric for orf-U2OS-96


100%|██████████| 160/160 [00:51<00:00,  3.10it/s]


In [5]:
results.groupby(["Metadata_Perturbation", "Cell_Time", "Metadata_broad_sample"])[
    ["ap", "ap_corrected"]
].mean().reset_index().rename(columns={"ap": "mAP", "ap_corrected": "adjusted_mAP"}).to_csv("output/non-vectorized-ap.csv", index=False)