In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
import os

ROOT = Path.cwd().resolve()
if ROOT.name == "notebooks":
    ROOT = ROOT.parent
os.chdir(ROOT)

OUTDIR = Path("synthetic-data") / "affinity"
OUTDIR.mkdir(parents=True, exist_ok=True)

def four_param_logistic(conc, A, B, C, D):
    return D + (A - D) / (1 + (conc / C)**B)

rng = np.random.default_rng(42)

CONCS = np.array([
    50, 25, 12.5, 6.25, 3.125, 1.5625, 0.78125, 0.390625,
    0.1953125, 0.09765625, 0.048828125, 0.0244140625,
    0.01220703125, 0.006103515625, 0.0030517578125, 0.00152587890625
], dtype=float)

def make_curve(params, noise=0.015, flat=False, outliers=0):
    if flat:
        y = np.full_like(CONCS, params["D"]) + rng.normal(0, noise, len(CONCS))
    else:
        y = four_param_logistic(CONCS, **params)
        scale = noise * (0.6 + 0.4 * (y - y.min()) / (y.max() - y.min() + 1e-12))
        y += rng.normal(0, scale, len(CONCS))
    if outliers:
        idx = rng.choice(len(CONCS), outliers, replace=False)
        y[idx] += rng.normal(0, noise * 6, outliers)
    return np.clip(y, 1e-6, None)

COMPOUNDS = {
    "X1": {
        "t0": dict(A=1.00, B=1.1, C=0.07, D=0.10),
        "+Condition t0": dict(A=1.05, B=1.2, C=0.04, D=0.10),
        "t_x": dict(A=1.00, B=1.1, C=0.10, D=0.10),
        "+Condition t_x": dict(A=1.10, B=1.3, C=0.035, D=0.10),
    },
    "X2": {
        "t0": dict(A=0.95, B=1.0, C=0.40, D=0.12),
        "+Condition t0": dict(A=0.98, B=1.0, C=0.25, D=0.12),
        "t_x": dict(A=0.92, B=1.0, C=0.65, D=0.12),
        "+Condition t_x": dict(A=1.00, B=1.0, C=0.30, D=0.12),
    },
    "X3": {
        "t0": dict(A=0.90, B=0.9, C=5.0, D=0.15),
        "+Condition t0": dict(A=0.92, B=1.0, C=3.5, D=0.15),
        "t_x": dict(A=0.88, B=1.0, C=8.0, D=0.15),
        "+Condition t_x": dict(A=0.94, B=1.1, C=4.0, D=0.15),
    },
    "X4": {
        "t0": dict(A=0.50, B=1.1, C=0.30, D=0.20),
        "+Condition t0": dict(A=0.60, B=1.0, C=0.20, D=0.20),
        "t_x": dict(A=0.48, B=1.1, C=0.40, D=0.20),
        "+Condition t_x": dict(A=0.65, B=1.0, C=0.22, D=0.20),
    },
    "X5": {
        "t0": dict(A=1.00, B=2.8, C=0.12, D=0.08),
        "+Condition t0": dict(A=1.05, B=3.0, C=0.09, D=0.08),
        "t_x": dict(A=1.00, B=2.5, C=0.18, D=0.08),
        "+Condition t_x": dict(A=1.08, B=3.2, C=0.07, D=0.08),
    },
    "X6": {
        "t0": dict(A=0.30, B=1.0, C=1.0, D=0.28),
        "+Condition t0": dict(A=0.32, B=1.0, C=1.0, D=0.29),
        "t_x": dict(A=0.95, B=1.1, C=0.50, D=0.12),
        "+Condition t_x": dict(A=1.05, B=1.2, C=0.30, D=0.12),
    },
    "X7": {
        "t0": dict(A=0.31, B=1.0, C=1.0, D=0.30),
        "+Condition t0": dict(A=0.32, B=1.0, C=1.0, D=0.31),
        "t_x": dict(A=0.33, B=1.0, C=1.0, D=0.31),
        "+Condition t_x": dict(A=0.32, B=1.0, C=1.0, D=0.30),
    },
}

def write_compound(name, spec):
    rows = []
    for cond, params in spec.items():
        if name == "X7":
            y = make_curve(params, noise=0.02, flat=True, outliers=3)
        elif name == "X6" and cond in ["t0", "+Condition t0"]:
            y = make_curve(params, noise=0.01, flat=True)
        else:
            y = make_curve(params, noise=0.015, outliers=1)
        for x, yy in zip(CONCS, y):
            rows.append({"X": float(x), "Y": float(yy), "Condition": cond})
    df = pd.DataFrame(rows)
    out = OUTDIR / f"synth_{name}.csv"
    df.to_csv(out, index=False)
    print(f"Created {out.as_posix()}")

for name, spec in COMPOUNDS.items():
    write_compound(name, spec)

Created synthetic-data/affinity/synth_X1.csv
Created synthetic-data/affinity/synth_X2.csv
Created synthetic-data/affinity/synth_X3.csv
Created synthetic-data/affinity/synth_X4.csv
Created synthetic-data/affinity/synth_X5.csv
Created synthetic-data/affinity/synth_X6.csv
Created synthetic-data/affinity/synth_X7.csv
