In [9]:
import numpy as np
import pandas as pd

rng = np.random.default_rng(7)

NEG_CTRL = "PBS-T"
POS_CTRL = "POS_CTRL"

SCREENINGS = ["First", "Second", "Third"]
RESCREEN_CATS = {"Inconclusive", "Inhomogeneous Signal", "Scan Anomaly"}

REPS_PER_LIGAND = 4
REPS_NEG_CTRL = 4
REPS_POS_CTRL = 4

def plate_wells_96():
    rows = list("ABCDEFGH")
    cols = list(range(1, 13))
    return [f"{r}{c}" for r in rows for c in cols]

def category_to_flag(category):
    if category == "Scan Anomaly":
        return "Scan anomaly"
    if category == "Inhomogeneous Signal":
        return "Inhomogeneous"
    if category == "Inconclusive":
        return "Inconclusive"
    if category == "Aggregation":
        return "Aggregation"
    if category == "Hit":
        return "OK"
    if category == "Non-binder":
        return "OK"
    if category == "Control":
        return "Control"
    return "OK"

def sample_ratio(category, screening_idx, ligand):
    pb_center = 1.0 + 0.002 * screening_idx

    if ligand == NEG_CTRL:
        return rng.normal(pb_center, 0.004)

    if ligand == POS_CTRL:
        return rng.normal(1.18 + 0.008 * screening_idx, 0.015)

    if category == "Hit":
        if ligand == "X4":
            return rng.normal(1.045 + 0.006 * screening_idx, 0.018)
        if ligand == "X5":
            return rng.normal(1.12 + 0.010 * screening_idx, 0.030)
        return rng.normal(1.10 + 0.012 * screening_idx, 0.020)

    if category == "Non-binder":
        return rng.normal(pb_center, 0.006)

    if category == "Inhomogeneous Signal":
        return rng.normal(1.05 + 0.008 * screening_idx, 0.030)

    if category == "Scan Anomaly":
        return rng.normal(0.98, 0.055)

    if category == "Aggregation":
        return rng.normal(1.25 + 0.010 * screening_idx, 0.030)

    if category == "Inconclusive":
        return rng.normal(1.03 + 0.010 * screening_idx, 0.025)

    if category == "Control":
        return rng.normal(1.06 + 0.010 * screening_idx, 0.020)

    return rng.normal(1.02, 0.020)

def build_overview(ligand_to_category, excluded_wells, use_unnamed_cols=False):
    rows = [{"Ligand Information": "Name", "Analysis Results": ""}]
    for lig, cat in ligand_to_category.items():
        rows.append({"Ligand Information": lig, "Analysis Results": cat})

    for i, w in enumerate(sorted(excluded_wells), start=1):
        if use_unnamed_cols:
            rows.append({"Ligand Information": f"Data Point {i}", "Analysis Results": "",
                         "Unnamed: 2": w, "Unnamed: 4": "Excluded - synthetic"})
        else:
            rows.append({"Ligand Information": f"Data Point {i}", "Analysis Results": "",
                         "Well": w, "Details": "Excluded - synthetic"})
    return pd.DataFrame(rows)

def build_table(tested_ligands, ligand_to_cat, excluded_wells, screening_idx, plate_name):
    wells = plate_wells_96()
    rng.shuffle(wells)
    used = set()
    rows = []

    ref_level = 1000 + 80 * screening_idx
    resp_noise = 35 + 10 * screening_idx

    def take_well():
        for w in wells:
            if w not in used:
                used.add(w)
                return w
        raise RuntimeError("Ran out of wells.")

    def add_row(well, ligand, ratio, category, rep):
        reference = rng.normal(ref_level, 40)
        response = reference * ratio + rng.normal(0, resp_noise)
        init = response + rng.normal(0, 25)
        rows.append({
            "Plate": plate_name,
            "Well ID": well,
            "Ligand": ligand,
            "Replicate": rep,
            "Ratio": float(ratio),
            "dRatio": float(ratio - 1.0),
            "Reference Fluorescence": float(reference),
            "Response Fluorescence": float(response),
            "Initial Fluorescence": float(init),
            "Flag": category_to_flag(category),
        })

    for rep in range(1, REPS_NEG_CTRL + 1):
        w = take_well()
        ratio = max(sample_ratio("Control", screening_idx, NEG_CTRL), 0.2)
        add_row(w, NEG_CTRL, ratio, "Control", rep)

    for rep in range(1, REPS_POS_CTRL + 1):
        w = take_well()
        ratio = max(sample_ratio("Control", screening_idx, POS_CTRL), 0.2)
        add_row(w, POS_CTRL, ratio, "Control", rep)

    for lig in sorted(tested_ligands):
        cat = ligand_to_cat[lig]
        for rep in range(1, REPS_PER_LIGAND + 1):
            w = take_well()
            ratio = max(sample_ratio(cat, screening_idx, lig), 0.2)
            add_row(w, lig, ratio, cat, rep)

    for w in sorted(excluded_wells):
        if w not in used:
            used.add(w)
            lig = rng.choice(sorted(tested_ligands)) if tested_ligands else NEG_CTRL
            cat = ligand_to_cat.get(lig, "Non-binder")
            ratio = max(sample_ratio(cat, screening_idx, lig) + rng.normal(0, 0.03), 0.2)
            add_row(w, lig, ratio, "Excluded", 99)

    df = pd.DataFrame(rows)
    stats = df.groupby("Ligand", as_index=False)["Ratio"].agg(Avg="mean", Std="std", N="count").sort_values("Ligand")
    return df, stats

def main():
    trajectories = {
        "X1": ["Hit", "Hit", "Hit"],
        "X2": ["Hit", "Hit", "Hit"],
        "X3": ["Non-binder", "Hit", "Hit"],
        "X4": ["Inconclusive", "Hit", "Hit"],
        "X5": ["Scan Anomaly", "Inhomogeneous Signal", "Non-binder"],
        "X6": ["Non-binder", "Non-binder", "Hit"],
        "X7": ["Non-binder", "Non-binder", "Non-binder"],
    }

    for i in range(8, 13):
        trajectories[f"X{i}"] = ["Non-binder", "Non-binder", "Non-binder"]

    extras = {
        "HIT_A": ["Hit", "Hit", "Hit"],
        "HIT_B": ["Hit", "Hit", "Non-binder"],
        "NOISE_A": ["Inhomogeneous Signal", "Inhomogeneous Signal", "Non-binder"],
        "NOISE_B": ["Scan Anomaly", "Non-binder", "Non-binder"],
        "INC_A": ["Inconclusive", "Inconclusive", "Non-binder"],
        "NB_A": ["Non-binder", "Non-binder", "Non-binder"],
        "NB_B": ["Non-binder", "Non-binder", "Non-binder"],
    }

    all_ligs = trajectories
    ligand_names = sorted(all_ligs.keys())

    core = ["X1","X2","X3","X4","X5","X6","X7"]
    remaining = [l for l in ligand_names if l not in core]
    rng.shuffle(remaining)

    tested_first = set(core + remaining[:8])
    tested_so_far = set(tested_first)

    excluded_sets = [
        {"B3", "C7", "F11"},
        {"A2", "D8", "H12"},
        {"E5", "G9"},
    ]
    unnamed_flags = [True, False, True]

    tested_sets = [tested_first]

    for s_idx in [1, 2]:
        prev_tested = tested_sets[s_idx - 1]
        carry = {lig for lig in prev_tested if all_ligs[lig][s_idx - 1] in RESCREEN_CATS}

        not_tested_yet = [l for l in ligand_names if l not in tested_so_far]
        rng.shuffle(not_tested_yet)
        new_count = 8 if s_idx == 1 else 6
        new_ligs = set(not_tested_yet[:new_count])

        tested = carry | new_ligs
        tested_sets.append(tested)
        tested_so_far |= tested

    for i, scr in enumerate(SCREENINGS):
        tested = tested_sets[i]
        ligand_to_cat = {NEG_CTRL: "Control", POS_CTRL: "Control"}
        for lig in tested:
            ligand_to_cat[lig] = all_ligs[lig][i]

        overview = build_overview(ligand_to_cat, excluded_sets[i], use_unnamed_cols=unnamed_flags[i])
        table_df, stats_df = build_table(
            tested,
            ligand_to_cat,
            excluded_sets[i],
            screening_idx=i,
            plate_name=f"{scr}_Plate"
        )

        overview.to_excel(f"{scr.lower()} screening overview.xlsx", index=False)
        with pd.ExcelWriter(f"table {scr.lower()} screening.xlsx", engine="openpyxl") as writer:
            table_df.to_excel(writer, sheet_name="Table", index=False)
            stats_df.to_excel(writer, sheet_name="Stats", index=False)

    print("Created staged synthetic screening Excel files.")

if __name__ == "__main__":
    main()

Created staged synthetic screening Excel files.
