In [1]:
import pandas as pd
import utils

In [2]:
# Read the data
data = pd.read_parquet('../compare_parquet/data.parquet')

In [3]:
results = pd.DataFrame()

for cell in data.Metadata_Cell_type.unique():
    cell_df = data.query("Metadata_Cell_type == @cell")
    for perturbation in cell_df.Metadata_Perturbation.unique():
        perturbation_df = cell_df.query("Metadata_Perturbation == @perturbation")
        for time in perturbation_df.Metadata_Time.unique():
            time_df = perturbation_df.query("Metadata_Time == @time").copy()

            if perturbation == "compound":
                time_df["Metadata_broad_sample"].fillna("DMSO", inplace=True)

            time_df = utils.remove_empty_wells(time_df).reset_index(drop=True)

            pos_dict = {
                "filter": {"Metadata_control_type": ["'negcon'"]},
                "matching_col": ["Metadata_broad_sample"],
            }

            ref_dict = {
                "filter": {
                    "Metadata_pert_type": ["'trt'"],
                    "Metadata_control_type": [
                        "'poscon_orf'",
                        "'poscon_diverse'",
                        "'poscon_cp'",
                    ],
                },
                "matching_col": ["Metadata_Plate"],
            }

            print(f'Computing metric for {perturbation}-{cell}-{time}')

            metric = utils.AveragePrecision(
                time_df, pos_dict, ref_dict,
            )

            results = pd.concat([results, (
                metric.ap
                .assign(Metadata_Perturbation=perturbation)
                .assign(Cell_Time=f'{cell}_{utils.time_point(perturbation, time)}')
            )], axis=0)

Computing metric for compound-A549-24
Computing metric for compound-A549-48
Computing metric for crispr-A549-144
Computing metric for crispr-A549-96
Computing metric for orf-A549-96
Computing metric for orf-A549-48
Computing metric for compound-U2OS-24
Computing metric for compound-U2OS-48
Computing metric for crispr-U2OS-144
Computing metric for crispr-U2OS-96
Computing metric for orf-U2OS-48
Computing metric for orf-U2OS-96


In [None]:
results.groupby(["Metadata_Perturbation", "Cell_Time", "Metadata_broad_sample"])[
    ["average_precision", "adjusted_average_precision"]
].mean().reset_index().rename(columns={"average_precision": "mAP", "adjusted_average_precision": "adjusted_mAP"}).to_csv("output/vectorized-ap.csv", index=False)