In [1]:
import pandas as pd
from tqdm import tqdm
import utils
from copairs.map import run_pipeline
import numpy as np
import utils


In [2]:
# Read the data
data = pd.read_parquet('../compare_parquet/data.parquet')

In [3]:
results = pd.DataFrame()

for cell in data.Metadata_Cell_type.unique():
    cell_df = data.query("Metadata_Cell_type == @cell")
    for perturbation in cell_df.Metadata_Perturbation.unique():
        perturbation_df = cell_df.query("Metadata_Perturbation == @perturbation")
        for time in perturbation_df.Metadata_Time.unique():
            time_df = (
                perturbation_df.query("Metadata_Time == @time")
                .copy()
                .assign(
                    Metadata_negcon=lambda x: x.apply(
                        lambda y: 1 if y["Metadata_control_type"] == "negcon" else 0,
                        axis=1,
                    )
                )
                .assign(
                    Metadata_Plate_Well=lambda x: x.Metadata_Plate
                    + "_"
                    + x.Metadata_Well
                )
            )

            if perturbation == "compound":
                time_df["Metadata_broad_sample"].fillna("DMSO", inplace=True)

            time_df = utils.remove_empty_wells(time_df).reset_index(drop=True)

            pos_sameby = "Metadata_broad_sample"
            pos_diffby = "Metadata_Plate_Well"
            neg_sameby = "Metadata_Plate"
            neg_diffby = "Metadata_negcon"
            null_size = 10000

            meta = utils.get_metadata(time_df)
            feature_df = utils.get_featuredata(time_df)
            feats = feature_df.values

            print(f"Computing metric for {perturbation}-{cell}-{time}")

            metric = run_pipeline(
                meta, feats, pos_sameby, pos_diffby, neg_sameby, neg_diffby, null_size
            )

            results = pd.concat(
                [
                    results,
                    (
                        metric.assign(Metadata_Perturbation=perturbation).assign(
                            Cell_Time=f"{cell}_{utils.time_point(perturbation, time)}"
                        )
                    ),
                ],
                axis=0,
            )

Computing metric for compound-A549-24


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Computing metric for compound-A549-48


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Computing metric for crispr-A549-144


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Computing metric for crispr-A549-96


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Computing metric for orf-A549-96


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Computing metric for orf-A549-48


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Computing metric for compound-U2OS-24


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Computing metric for compound-U2OS-48


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

Computing metric for crispr-U2OS-144


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Computing metric for crispr-U2OS-96


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

Computing metric for orf-U2OS-48


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

Computing metric for orf-U2OS-96


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
results.groupby(["Metadata_Perturbation", "Cell_Time", "Metadata_broad_sample"])[
    ["average_precision",]
].mean().reset_index().rename(columns={"average_precision": "mAP"}).to_csv("output/copairs-ap.csv", index=False)