# Quantifying uncertainty in simulation results

## Introduction

The purpose of this cookbook is to demonstrate how one can compute and plot the 95% percentile prediction interval (PPI) of the mean for multiple time series at once.

To compute a PPI for a given summary metric (e.g.: the mean), a given number of randomly samples of size n are drawn from a larger population. The metric of interest is computed for each sample, therefore allowing to estimate the empirical distribution of the latter. The 2.5% and 97.5% percentiles are then estimated.

The PPI is a relevant metric to assess the degree of uncertainty embedded in the model and eventually to compare it to the uncertainty observed in a real-life setting. Indeed, standard confidence intervals are not well suited for the in silico context as they tend to get very narrow as the Virtual Population (VP) size increases. On the other hand, PPI allows to define a sample size (in the case where the VP is much larger than a real-life clinical trial, one can use the same sample size as the one used for real life observations).


In [None]:
# Jinko specifics imports & initialization
# Please fold this section and do not edit it

import sys

sys.path.insert(0, "../lib")
import jinko_helpers as jinko

# Connect to Jinko (see README.md for more options)

jinko.initialize()

In [None]:
# Cookbook specifics imports
import io
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import zipfile
import numpy as np
from multiprocessing import Pool

# Cookbook specifics constants:
# put here the constants that are specific to your cookbook like
# the reference to the Jinko items, the name of the model, etc.

# @param {"name":"trialId", "type": "string"}
# trial short id can be retrieved in the url, pattern is `https://jinko.ai/<trail_sid>`
trial_sid = "tr-9Bid-BL1I"

## Step 1: Loading the trial and getting the last completed version


In [None]:
# Convert short id to core item id
trial_core_item_id = jinko.get_core_item_id(trial_sid, 1)

# List all trial versions
# https://doc.jinko.ai/api/#/paths/core-v2-trial_manager-trial-trialId--status/get
response = jinko.make_request(
    f'/core/v2/trial_manager/trial/{trial_core_item_id["id"]}/status'
)
versions = response.json()

# Get the latest completed version
try:
    latest_completed_version = next(
        (item for item in versions if item["status"] == "completed"), None
    )
    if latest_completed_version is None:
        raise Exception("No completed trial version found")
    else:
        print(
            "Successfully fetched this simulation:\n",
            json.dumps(latest_completed_version, indent=1),
        )
        simulation_id = latest_completed_version["simulationId"]
        trial_core_item_id = simulation_id["coreItemId"]
        trial_snapshot_id = simulation_id["snapshotId"]
except Exception as e:
    print(f"Error processing trial versions: {e}")
    raise

## Step 2: Displaying a summary of the data content


In [None]:
response_summary = jinko.get_trial_scalars_summary(trial_core_item_id, trial_snapshot_id, print_summary=True)

# Extracting arm names
arm_names = response_summary['arms']

# Store the list of scenario descriptors fetch them
scenario_descriptors = [
    scalar["id"]
    for scalar in (response_summary["scalars"] + response_summary["categoricals"])
    if "ScenarioOverride" in scalar["type"]["labels"]
]
print("List of scenario overrides:\n", scenario_descriptors, "\n")

## Step 3: Retrieving time series


In [None]:
# Listing the time series to retrieve
time_series_ids = ["Blood.Drug", "Tumor.CancerCell"]

try:
    print("Retrieving time series data...")
    response = jinko.make_request(
        "/core/v2/result_manager/trial/%s/snapshots/%s/timeseries/download" % (
            trial_core_item_id, trial_snapshot_id
        ),
        method='POST',
        json={
            "timeseries": {ts: arm_names for ts in time_series_ids},
        },
    )
    if response.status_code == 200:
        print("Time series data retrieved successfully.")
        archive = zipfile.ZipFile(io.BytesIO(response.content))
        filename = archive.namelist()[0]
        print(f"Extracted time series file: {filename}")
        csv_time_series = archive.read(filename).decode("utf-8")
    else:
        print(
            f"Failed to retrieve time series data: {response.status_code} - {response.reason}"
        )
        response.raise_for_status()
except Exception as e:
    print(f"Error during time series retrieval or processing: {e}")
    raise

## Step 4: Post-processing the time series


In [None]:
# Loading timeseries into a dataframe
df_time_series = pd.read_csv(io.StringIO(csv_time_series))
print("Raw timeseries data (first rows): \n")
display(df_time_series.head())

# Count the number of observations per time point
counts = df_time_series["Time"].value_counts()

# Check if all time points have the same number of observations
all_equal = counts.nunique() == 1

if all_equal:
    print("All time points have the same number of observations.")
else:
    print(f"Time points have varying numbers of observations:\n{counts.value_counts()}")

n_patients = len(df_time_series["Patient Id"].unique())
print(f"Successfully loaded {n_patients} patients.")

## Step 5: Computing mean value by time point, for each arm and each descriptor


In [None]:
df_means_grouped = (
    df_time_series
    .groupby(["Arm", "Descriptor", "Time"])["Value"]
    .mean()
    .reset_index()
    .rename(columns={"Value":"Mean"})
)
display(df_means_grouped)

## Step 6: Computing the 95% percentiles


### Defining useful functions


In [None]:
# Functions for each quantile of interest
def q_0025(x):
    return x.quantile(0.025)

def q_975(x):
    return x.quantile(0.975)

# Function to sample the boostrapping groups (sampling over individual patients)
def generate_subsample_groups(data, num_subsamples, sample_size):
    patient_ids=data["Patient Id"].unique()
    npatients=len(patient_ids)
    groups=patient_ids[np.random.randint(npatients,size=(num_subsamples,sample_size))]
    return groups

# A function to compute the mean over each group of the data frame
# Defined in a separate module so that mutiprocessing accepts it
def compute_group_mean(group):
    temp_df = df_time_series.loc[df_time_series["Patient Id"].isin(group)].groupby(["Descriptor", "Arm", "Time"])["Value"].mean().explode(0).reset_index().rename(columns={"Value":"Subsample_Mean"})
    return temp_df

### Running the bootstrapping


In [None]:
# Defining the number of subsamples and sample size
num_subsamples = 500
sample_size = 50

# Define the groups
groups = generate_subsample_groups(df_time_series,num_subsamples,sample_size)

# For each group, compute the mean over the filtered and grouped data frame
# This computation is parallelized using the multiprocessing library (https://docs.python.org/3/library/multiprocessing.html)
pool = Pool()
dfs = pool.map(compute_group_mean,groups)
df_subsample_means=pd.concat(dfs)


# Computing percentiles
df_percentiles_grouped = (
    df_subsample_means.groupby(["Arm", "Descriptor", "Time"])
    .agg(LoBound=('Subsample_Mean',q_0025),HiBound=('Subsample_Mean',q_975))
    .reset_index()
)

# Merging the two data frames together
df_ppi = pd.merge(
    df_means_grouped, df_percentiles_grouped, on=["Arm", "Descriptor", "Time"]
)
display(df_ppi.head())

# Step 7: Plotting the outputs


In [None]:
## Creating subplots
unique_variables = df_ppi["Descriptor"].unique()
fig = make_subplots(
    rows=1,
    cols=len(unique_variables),
    shared_yaxes=False,
    subplot_titles=unique_variables,
)

## Defining colors for different arms
palette = px.colors.qualitative.Plotly

## Creating a dictionary to map each arm to a color
unique_arm = df_ppi["Arm"].unique()
color_map = {
    category: palette[i % len(palette)] for i, category in enumerate(unique_arm)
}


## Looping through each descriptor and adding traces for mean, lower bound, and upper bound stratified by arm
for i, group in enumerate(unique_variables):
    group_df = df_ppi[df_ppi["Descriptor"] == group]

    for arm in unique_arm:
        subset = group_df[group_df["Arm"] == arm]
        x=subset["Time"]
        xrev=x[::-1]
        yHigh=subset["HiBound"]
        yLowRev=subset["LoBound"][::-1]
        # Add the mean line (plain line)
        fig.add_trace(
            go.Scatter(
                x=subset["Time"],
                y=subset["Mean"],
                mode="lines",
                name=f"{group} {arm} Mean",
                line=dict(color=color_map[arm]),
            ),
            row=1,
            col=i + 1,
        )

        # Add the ribbon plot of the prediction precidion interval
        fig.add_trace(
            go.Scatter(
                x=pd.concat([x,xrev]),
                y=pd.concat([yHigh,yLowRev]),
                mode="lines",
                fill="toself",
                opacity=0.5,
                name=f"{group} {arm} 95% PPI",
                line=dict(color=color_map[arm], width=0),
            ),
            row=1,
            col=i + 1,
        )

## Updating the layout
fig.update_layout(
    title="Mean and Bootstrapped 95% Prediction Interval Stratified by Variable and Arm",
    xaxis_title="X-axis",
    yaxis_title="Values",
    legend_title="Legend",
)

## Show the plot
fig.show()