# Clinical trial deisgn optimization 

## Loading dependencies

In [None]:
# Cookbook specifics imports
import jinko_helpers as jinko
import json
import io
import math
import matplotlib.pyplot as plt
from modAL.models import BayesianOptimizer
from modAL.acquisition import max_EI
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import scipy.stats as st
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.neighbors import KNeighborsRegressor
import zipfile
import textwrap

## Defining general parameters

In [None]:
# Jinko trial short ID, URL pattern is "https://jinko.ai/{trial_sid}"
trial_sid = "tr-HLRF-b0zW"
# Outcome ID
outcome_name = "tumorBurdenChangeFromBaseline.tend"
# control and treated arm IDs
control_arm_id = "sc-1-10"
treated_arm_id = "iv-1-10"

"""
Sample size parameters
"""
alpha = 0.05
beta = 0.2

""" 
Features sorted by descending order of importance (as per tornado or Random Forest analysis for instance)
See "sensitivity_analysis.ipynb"
"""
all_features = [
    "initialTumorBurden.tmin",
    "ec50Drug.tmin",
    "Blood.tmin",
    "Tissue.tmin",
    "kClearanceDrug.tmin",
]

""" Minimal size of the filtered responder vpop
(number of times the required sample size should be included in the responder vp in terms of nb of patients)
Will be evaluated for each set of eligibility criteria
"""
min_resp_vp_size = 10

"""
Objective function weights
"""
# weight of gross efficacy
efficacy_wt = 1
# weight of standard deviation of gross efficacy
efficacy_sd_wt = 2

"""
Boostrapping parameters
"""
## Number of bootstraps for gross efficacy dispersion estimation
num_bootstraps = 50
## Seed for all random processes
cookbook_seed = 2025

## Defining functions
### Sample size computation

In [None]:
# Definition of sample size computation formulas
## Sample size for two independent samples, continuous outcome
def sample_size_continuous_outcome(alpha, beta, diff_btw_groups, sd_outcome, dropout=0):
    z_alpha = st.norm.ppf(1 - (alpha / 2))
    z_beta = st.norm.ppf(1 - beta)

    return (2 * ((z_alpha + z_beta) / (abs(diff_btw_groups) / sd_outcome)) ** 2) / (
        1 - dropout
    )

### Objective function

In [None]:
# Objective function to be maximized
def objective_function(
    efficacy,
    efficacy_sd,
):
    return (efficacy * efficacy_wt - efficacy_sd * efficacy_sd_wt) / (
        efficacy_wt + efficacy_sd_wt
    )

## Loading trial results from Jinko

In [None]:
jinko.initialize()

# Convert short id to core item id
trial_core_item_id = jinko.get_core_item_id(trial_sid, 1)

# List all trial versions
# https://doc.jinko.ai/api/#/paths/core-v2-trial_manager-trial-trialId--status/get
response = jinko.make_request(
    f'/core/v2/trial_manager/trial/{trial_core_item_id["id"]}/status'
)
versions = response.json()

# Get the latest completed version
try:
    latest_completed_version = next(
        (item for item in versions if item["status"] == "completed"), None
    )
    if latest_completed_version is None:
        raise Exception("No completed trial version found")
    else:
        simulation_id = latest_completed_version["simulationId"]
        trial_core_item_id = simulation_id["coreItemId"]
        trial_snapshot_id = simulation_id["snapshotId"]
except Exception as e:
    print(f"Error processing trial versions: {e}")
    raise

# https://doc.jinko.ai/api/#/paths/core-v2-trial_manager-trial-trialId--snapshots--trialIdSnapshot--results_summary/get
response = jinko.make_request(
    f"/core/v2/trial_manager/trial/{trial_core_item_id}/snapshots/{trial_snapshot_id}/results_summary",
    method="GET",
)
response_summary = json.loads(response.content)

# Retrieving scalar results
json_data = {
    "trialId": {"coreItemId": trial_core_item_id, "snapshotId": trial_snapshot_id}
}

# https://doc.jinko.ai/api/#/paths/core-v2-result_manager-scalars_summary/post
response = jinko.make_request(
    path="/core/v2/result_manager/trial_visualization",
    method="POST",
    json=json_data,
)

# https://doc.jinko.ai/api/#/paths/core-v2-result_manager-scalars_summary/post
response = jinko.make_request(
    path="/core/v2/result_manager/scalars_summary",
    method="POST",
    json={
        "select": all_features + [outcome_name],
        "trialId": latest_completed_version["simulationId"],
    },
)
archive = zipfile.ZipFile(io.BytesIO(response.content))
filename = archive.namelist()[0]

csv_scalars = archive.read(filename).decode("utf-8")

scalars_dtf = pd.read_csv(io.StringIO(csv_scalars))
print("Number of rows in the initial table:", len(scalars_dtf))

In [None]:
# assembling the dataframes of interest
cross_arm_scalars = scalars_dtf.loc[scalars_dtf["armId"] == "crossArms"].pivot(
    index="patientId", columns="scalarId", values="value"
)
control_arm_scalars = scalars_dtf.loc[scalars_dtf["armId"] == control_arm_id].pivot(
    index="patientId", columns="scalarId", values="value"
)
treated_arm_scalars = scalars_dtf.loc[scalars_dtf["armId"] == treated_arm_id].pivot(
    index="patientId", columns="scalarId", values="value"
)
control_arm_scalars = pd.merge(
    left=cross_arm_scalars, right=control_arm_scalars, how="left", on="patientId"
)
treated_arm_scalars = pd.merge(
    left=cross_arm_scalars, right=treated_arm_scalars, how="left", on="patientId"
)

print(f"cross_arm_scalars table has {len(cross_arm_scalars)} rows")
print(f"control_arm_scalars table has {len(control_arm_scalars)} rows")
print(f"treated_arm_scalars table has {len(treated_arm_scalars)} rows")

## Trial net efficacy and sample size

In [None]:
mean_control = control_arm_scalars[outcome_name].mean()
mean_treated = treated_arm_scalars[outcome_name].mean()
initial_net_efficacy = mean_control - mean_treated

std_control = control_arm_scalars[outcome_name].std()
std_treated = treated_arm_scalars[outcome_name].std()

print(f"This trial has a net efficacy of {initial_net_efficacy:.3g}")
sample_size = math.ceil(
    sample_size_continuous_outcome(alpha, beta, initial_net_efficacy, std_control)
)
print(f"With beta = {beta}, this means a sample size of {sample_size}")

## Plotting absolute benefit vs features

In [None]:
num_features = len(all_features)
features_wrapped = ["<br>".join(textwrap.wrap(t, width=30)) for t in all_features]

efficacy_df = pd.merge(
    control_arm_scalars, treated_arm_scalars[outcome_name], how="inner", on="patientId"
)
efficacy_df["absolute_benefit"] = (
    efficacy_df[f"{outcome_name}_x"] - efficacy_df[f"{outcome_name}_y"]
)
efficacy_df = efficacy_df.dropna()
max_absolute_benefit = efficacy_df["absolute_benefit"].max()
print(f"Maximum absolute benefit = {max_absolute_benefit:.2g}")

efficacy_df = efficacy_df.sort_values(by="absolute_benefit")
fig = make_subplots(
    num_features,
    num_features,
    shared_xaxes=True,
    shared_yaxes=False,
    horizontal_spacing=0.03,
    vertical_spacing=0.01,
    column_titles=features_wrapped,
    row_titles=features_wrapped,
)

for i in range(num_features):  # iterating over rows
    for j in range(num_features):  # iterating over columns
        if i == j:
            x = efficacy_df[all_features[i]]
            nx = 10
            xs = np.linspace(x.min(), x.max(), nx + 1)
            y = []
            for k in range(nx):
                eff_k = efficacy_df[
                    (efficacy_df[all_features[i]] >= xs[k])
                    & (efficacy_df[all_features[i]] < xs[k + 1])
                ]["absolute_benefit"]
                y.append(eff_k.mean())
            fig.add_trace(
                go.Scatter(x=xs[:-1] + 0.5 * (xs[1] - xs[0]), y=y, mode="lines"),
                row=i + 1,
                col=j + 1,
            )
        else:
            fig.add_trace(
                go.Scattergl(
                    x=efficacy_df[all_features[j]],
                    y=efficacy_df[all_features[i]],
                    mode="markers",
                    marker=dict(
                        size=5,
                        color=efficacy_df["absolute_benefit"],
                        opacity=efficacy_df["absolute_benefit"] / max_absolute_benefit,
                        coloraxis="coloraxis1",
                    ),
                    hoverinfo="none",
                ),
                row=i + 1,
                col=j + 1,
            )
fig.update_annotations(font_size=12)
fig.update_coloraxes(
    colorbar_title="absolute benefit",
    cmin=0.2 * max_absolute_benefit,
    cmax=0.7 * max_absolute_benefit,
    colorscale="rainbow",
    colorbar_thickness=20,
    colorbar_title_side="right",
)
fig.update_layout(
    font=dict(size=12),
    showlegend=False,
    width=1000,
    height=900,
    template="plotly_white",
)
fig.show()

## Locally averaged absolute benefit

Using nearest-neighbor regression, we compute the locally averaged absolute benefit
Using a sample_size as number of neighbors, the Chebyshev norm and uniform weighting, this is equivalent
to computing the mean of the absolute benefit in a hypercube centered around each point in the feature space
and therefore serves a good proxy for the net efficacy

In [None]:
neigh = KNeighborsRegressor(
    n_neighbors=sample_size, weights="uniform", metric="chebyshev"
)
neigh.fit(
    efficacy_df[all_features].to_numpy(), efficacy_df["absolute_benefit"].to_numpy()
)
efficacy_df["locally_averaged_absolute_benefit"] = neigh.predict(
    efficacy_df[all_features].to_numpy()
)
min_locally_averaged_absolute_benefit, max_locally_averaged_absolute_benefit = (
    efficacy_df["locally_averaged_absolute_benefit"].min(),
    efficacy_df["locally_averaged_absolute_benefit"].max(),
)


efficacy_df = efficacy_df.sort_values(by="locally_averaged_absolute_benefit")
fig = make_subplots(
    num_features,
    num_features,
    shared_xaxes=True,
    shared_yaxes=False,
    horizontal_spacing=0.03,
    vertical_spacing=0.01,
    column_titles=features_wrapped,
    row_titles=features_wrapped,
)

for i in range(num_features):  # iterating over rows
    for j in range(num_features):  # iterating over columns
        if i == j:  # do not plot anything on the diagonal
            x = efficacy_df[all_features[i]]
            nx = 10
            xs = np.linspace(x.min(), x.max(), nx + 1)
            y = []
            for k in range(nx):
                eff_k = efficacy_df[
                    (efficacy_df[all_features[i]] >= xs[k])
                    & (efficacy_df[all_features[i]] < xs[k + 1])
                ]["locally_averaged_absolute_benefit"]
                y.append(eff_k.mean())
            fig.add_trace(
                go.Scatter(x=xs[:-1] + 0.5 * (xs[1] - xs[0]), y=y, mode="lines"),
                row=i + 1,
                col=j + 1,
            )
        else:
            fig.add_trace(
                go.Scattergl(
                    x=efficacy_df[all_features[j]],
                    y=efficacy_df[all_features[i]],
                    mode="markers",
                    marker=dict(
                        size=5,
                        color=efficacy_df["locally_averaged_absolute_benefit"],
                        opacity=efficacy_df["locally_averaged_absolute_benefit"]
                        / max_locally_averaged_absolute_benefit,
                        coloraxis="coloraxis1",
                    ),
                    hoverinfo="none",
                ),
                row=i + 1,
                col=j + 1,
            )
fig.update_annotations(font_size=12)
fig.update_coloraxes(
    colorbar_title="locally averaged absolute benefit",
    cmin=min_locally_averaged_absolute_benefit,
    cmax=max_locally_averaged_absolute_benefit,
    colorscale="rainbow",
    colorbar_thickness=15,
    colorbar_title_side="right",
)
fig.update_layout(
    font=dict(size=14),
    showlegend=False,
    width=1000,
    height=900,
    template="plotly_white",
)
fig.show()

## Selecting features

In [None]:
num_features = 3
features = all_features[:num_features]
print(f"Selected features: {features}")

## Gross efficacy tooling

In [None]:
def to_row(x_vec, columns):
    return {columns[i]: x for i, x in enumerate(x_vec)}


def lower_bound(d):
    return f"{d}_lower_bound"


def width(d):
    return f"{d}_width"


feature_bounds = {
    d: (cross_arm_scalars.min(axis=0)[d], cross_arm_scalars.max(axis=0)[d])
    for d in features
}
column_bounds = {}
for d in features:
    (d_min, d_max) = feature_bounds[d]
    column_bounds[lower_bound(d)] = (d_min, d_max - 0.1 * (d_max - d_min))
    column_bounds[width(d)] = (0.1 * (d_max - d_min), d_max - d_min)

dim = len(column_bounds)
column_keys = list(column_bounds.keys())


def filter_conditions(row):
    conditions = [
        (cross_arm_scalars[d] >= row[lower_bound(d)])
        & (cross_arm_scalars[d] <= row[lower_bound(d)] + row[width(d)])
        for d in features
    ]
    return np.logical_and.reduce(conditions)


def group_size(row):
    return len(cross_arm_scalars[filter_conditions(row)])


def filter(row):
    if group_size(row) <= sample_size * min_resp_vp_size:
        return False
    else:
        return True


def score(row, seed=cookbook_seed, n_boot=num_bootstraps, verbose=False):
    mean_efficacy, sd_efficacy = gross_efficacy(row, seed=seed, n_boot=n_boot)
    if verbose:
        print(f"mean, std of gross efficacy = {mean_efficacy:.3g}, {sd_efficacy:.2g}")
    if mean_efficacy and sd_efficacy:
        return objective_function(
            efficacy=mean_efficacy,
            efficacy_sd=sd_efficacy,
        )
    else:
        return None


def gross_efficacy(row, seed=cookbook_seed, n_boot=num_bootstraps):
    # A reproducible random generator whose seed depends on the top-level cookbook seed AND the evaluated design row
    rng = np.random.default_rng([seed, abs(hash(frozenset(row.items())))])
    # Creating the corresponding control and treated filtered dataset
    control_filtered = control_arm_scalars[filter_conditions(row)][outcome_name]
    treated_filtered = treated_arm_scalars[filter_conditions(row)][outcome_name]
    if len(control_filtered) <= sample_size * min_resp_vp_size:
        return None, None
    bootstrap_gross_efficacies = []
    all_indices = np.arange(len(control_filtered))
    for _ in range(n_boot):
        # pick 2 * sample_size indices at random from the full dataset
        shuffled_indices = rng.choice(all_indices, size=2 * sample_size, replace=False)
        # first sample_size indices for the control sub-group
        ctrl_mean = control_filtered.iloc[shuffled_indices[:sample_size]].mean()
        # next sample_size indices for the treated sub-group
        trtd_mean = treated_filtered.iloc[
            shuffled_indices[sample_size : 2 * sample_size]
        ].mean()
        bootstrap_gross_efficacies.append(ctrl_mean - trtd_mean)

    return np.mean(bootstrap_gross_efficacies), np.std(
        bootstrap_gross_efficacies, ddof=1
    )


def net_efficacy(row):
    control_filtered = control_arm_scalars[filter_conditions(row)]
    treated_filtered = treated_arm_scalars[filter_conditions(row)]
    efficacy_df = pd.merge(
        control_filtered, treated_filtered[outcome_name], how="inner", on="patientId"
    )
    efficacy_df["net_efficacy"] = (
        efficacy_df[f"{outcome_name}_x"] - efficacy_df[f"{outcome_name}_y"]
    )
    mean_efficacy, std_efficacy = (
        efficacy_df["net_efficacy"].mean(),
        efficacy_df["net_efficacy"].std(),
    )
    return (mean_efficacy, std_efficacy)


def format_eligibility_criteria(row):
    s = ""
    for f in features:
        s += f"\n {row[lower_bound(f)]:.3g} <= {f} <= {row[lower_bound(f)] + row[width(f)]:.3g}"
    return s

## Generating design of experiment

In [None]:
sampler = st.qmc.Sobol(dim, scramble=False)
m = 14
print(f"Full design size = {2**m}")
scaled_samples = sampler.random_base2(m)  # generates 2**m points
samples = st.qmc.scale(
    scaled_samples,
    [column_bounds[c][0] for c in column_keys],
    [column_bounds[c][1] for c in column_keys],
)

filtered_indices = []
for i, x in enumerate(samples):
    row = to_row(x, columns=column_keys)
    if filter(row):
        filtered_indices.append(i)
print(f"Number of admissible points = {len(filtered_indices)} / {samples.shape[0]} ")
filtered_samples = samples[filtered_indices, :]

## Exhaustive search (turned off by default)

In [None]:
# Turn this flag to True to run an exhaustive search
# Warning: this may take a lot of time to run, approx. 15s per 1000 admissible points
exhaustive_search = True
if exhaustive_search:
    y = [score(to_row(x, columns=column_keys)) for x in filtered_samples]
    exhaustive_i_max, exhaustive_y_max = np.argmax(y), np.amax(y)
    exhaustive_best_criteria = to_row(
        filtered_samples[exhaustive_i_max], columns=column_keys
    )
    print(
        f"Exhaustive search: best row index = {exhaustive_i_max}, best score = {exhaustive_y_max:.3g}"
    )
    print(
        "Exhaustive search best eligibility criteria:\n"
        + format_eligibility_criteria(exhaustive_best_criteria)
    )
else:
    exhaustive_best_criteria = None

## Bayesian Optimization

In [None]:
# full design length
N = filtered_samples.shape[0]

# training size
n = 128
scaled_filtered = st.qmc.scale(
    filtered_samples,
    [column_bounds[c][0] for c in column_keys],
    [column_bounds[c][1] for c in column_keys],
    reverse=True,
)
# display(scaled_filtered)
X_training = scaled_filtered[:n, :]
y_training = [
    score(to_row(filtered_samples[i, :], columns=column_keys)) for i in range(n)
]
print(f"Training best score: {np.amax(y_training):.3g}")

kernel = Matern(length_scale=1.0)
regressor = GaussianProcessRegressor(kernel=kernel)
optimizer = BayesianOptimizer(
    estimator=regressor,
    X_training=X_training,
    y_training=y_training,
    query_strategy=max_EI,
)
X_max, y_max = optimizer.get_max()

# number of Bayesian Optimization iterations
num_queries = 50
queried = []
y_queried = []
for n_query in range(num_queries):
    (query_idx,), query_inst = optimizer.query(scaled_filtered)
    queried.append(query_idx)
    y = score(to_row(filtered_samples[query_idx, :], columns=column_keys))
    y_queried.append(y)
    optimizer.teach(scaled_filtered[query_idx, :].reshape(1, -1), np.array([y]))
    X_max, y_max = optimizer.get_max()
    if n_query % 10 == 0:
        print(
            f"iteration = {n_query}, queried index: {query_idx}, score = {y:.2g}, best ever score = {y_max:.3g}"
        )
    if exhaustive_search:
        if abs(y_max - exhaustive_y_max) < 1e-3:
            print(
                f"iteration = {n_query}, queried index: {query_idx}, score = {y:.3g} is close enough to exhaustive search maximum, let's stop"
            )
            break
bo_best_x = st.qmc.scale(
    np.array([X_max]),
    [column_bounds[c][0] for c in column_keys],
    [column_bounds[c][1] for c in column_keys],
)[0, :]
bo_best_criteria = to_row(bo_best_x, columns=column_keys)
bo_best_score = score(bo_best_criteria, verbose=True)
print(f"Bayesian Optimization best score = {bo_best_score:.3g}")
print(
    "Bayesian Optimization best eligibility criteria:\n"
    + format_eligibility_criteria(bo_best_criteria)
)

## Score landscape around Bayesian Optimization best estimate

In [None]:
nx = 21
xs = np.linspace(0.8, 1.2, nx)
plt.figure(figsize=(10, 6))
for j in range(len(bo_best_x)):
    u = bo_best_x.copy()
    ys = []
    for k in range(nx):
        u[j] = bo_best_x[j] * xs[k]
        ys.append(score(to_row(u, columns=column_keys)))
    plt.plot(xs, ys, label=column_keys[j])
plt.xlabel("parameter multiplicative factor")
plt.ylabel("score")
plt.legend()

## Are we lucky? Pure Monte Carlo
### Does a simple Monte Carlo fare better than Bayesian Optimization?

In [None]:
# How many samples to draw
num_mc = 10000
rng = np.random.default_rng([cookbook_seed])
X_mc = rng.uniform(low=0, high=1, size=(num_mc, dim))
for j in range(dim):
    (min_x, max_x) = (
        column_bounds[column_keys[j]][0],
        column_bounds[column_keys[j]][1],
    )
    X_mc[:, j] = min_x + (max_x - min_x) * X_mc[:, j]

filtered_indices = []
for i, x in enumerate(X_mc):
    row = to_row(x, columns=column_keys)
    if filter(row):
        filtered_indices.append(i)

print(f"Number of admissible points = {len(filtered_indices)} / {X_mc.shape[0]} ")
X_mc_filtered = X_mc[filtered_indices, :]
y_mc = np.array([score(to_row(x, columns=column_keys)) for x in X_mc_filtered])
mc_i_max = np.argmax(y_mc)
mc_best_criteria = to_row(X_mc_filtered[mc_i_max], columns=column_keys)
mc_best_score = score(mc_best_criteria, verbose=True)

print(f"MC best score = {mc_best_score:.3g}")
print("MC best eligibility criteria:\n" + format_eligibility_criteria(mc_best_criteria))

## Potential gains

In [None]:
def print_report(criteria, n_boot):
    num_responding = group_size(criteria)
    print(f"  Number of unique best responding patients: {num_responding}")
    mean_gross_efficacy, sd_gross_efficacy = gross_efficacy(criteria, n_boot=n_boot)
    print(
        f"  Mean of gross efficacy (with {n_boot} bootstraps) = {mean_gross_efficacy:.3g}"
    )
    print(
        f"  Standard deviation of gross efficacy (with {n_boot} bootstraps) = {sd_gross_efficacy:.2g}"
    )
    control_filtered = control_arm_scalars[filter_conditions(criteria)][outcome_name]
    treated_filtered = treated_arm_scalars[filter_conditions(criteria)][outcome_name]
    best_responders_net_efficacy = control_filtered.mean() - treated_filtered.mean()
    print(
        f"  Net efficacy in the population of best responders = {best_responders_net_efficacy:.3g}"
    )
    print(
        f"    --> gain in net efficacy = {best_responders_net_efficacy - initial_net_efficacy:.3g} (+{(best_responders_net_efficacy - initial_net_efficacy)/initial_net_efficacy:.0%})"
    )
    control_outcome_std = control_filtered.std()
    best_sample_size = math.ceil(
        sample_size_continuous_outcome(
            alpha, beta, best_responders_net_efficacy, control_outcome_std
        )
    )
    print(
        f"  Required sample size in population of best responders = {best_sample_size}"
    )
    print(
        f"    --> gain in required sample size = {sample_size - best_sample_size} (-{(sample_size - best_sample_size)/sample_size:.0%})"
    )


print(f"Net efficacy in initial population = {initial_net_efficacy:.3g}")
print(f"Required sample size in initial population = {sample_size}")

if exhaustive_best_criteria:
    print("\nExhaustive search:")
    print_report(exhaustive_best_criteria, n_boot=50)
if bo_best_criteria:
    print("\nBayesian Optimization:")
    print_report(bo_best_criteria, n_boot=50)
if mc_best_criteria:
    print("\nMonte Carlo:")
    print_report(mc_best_criteria, n_boot=50)