# Producing a summary table for simulated data
## Introduction

Summary tables are used to describe the characteristics of the patients included in a trial. This cookbook will explain how to generate one.

In [None]:
# Jinko specifics imports & initialization
# Please fold this section and do not change
import jinko_helpers as jinko

# Connect to Jinko (see README.md for more options)
jinko.initialize()

In [None]:
# Cookbook specific imports
from IPython.display import HTML
import io
import json
import numpy as np
import pandas as pd
import zipfile

# Cookbook specific constants:
# Put here the constants that are specific to your cookbook like
# the reference to the Jinko items, the name of the model, etc.

# @param {"name":"trialId", "type": "string"}
# The trial's short id can be retrieved in the url, pattern is `https://jinko.ai/<trial_sid>`
trial_sid = "tr-9Bid-BL1I"

## Step 1 : Loading the trial
### Getting the latest completed version

In [None]:
# Convert short id to core item id
trial_core_item_id = jinko.get_core_item_id(trial_sid, 1)

# List all trial versions
# https://doc.jinko.ai/api/#/paths/core-v2-trial_manager-trial-trialId--status/get
response = jinko.make_request(
    f'/core/v2/trial_manager/trial/{trial_core_item_id["id"]}/status'
)
versions = response.json()

# Get the latest completed version
try:
    latest_completed_version = next(
        (item for item in versions if item["status"] == "completed"), None
    )
    if latest_completed_version is None:
        raise Exception("No completed trial version found")
    else:
        print(
            "Successfully fetched this simulation:\n",
            json.dumps(latest_completed_version, indent=1),
        )
        simulation_id = latest_completed_version["simulationId"]
        trial_core_item_id = simulation_id["coreItemId"]
        trial_snapshot_id = simulation_id["snapshotId"]
except Exception as e:
    print(f"Error processing trial versions: {e}")
    raise

## Step 2 : Getting and post processing the trial resuls
### Displaying a summary of imported simulated data

In [None]:
response_summary = jinko.get_trial_scalars_summary(trial_core_item_id, trial_snapshot_id, print_summary=True)

# Store the list of scenario descriptors fetch them
scenario_descriptors = [
    scalar["id"]
    for scalar in (response_summary["scalars"] + response_summary["categoricals"])
    if "ScenarioOverride" in scalar["type"]["labels"]
]
print("List of scenario overrides:\n", scenario_descriptors, "\n")

### Retrieving scalar results

In [None]:
json_data = {
    "trialId": {"coreItemId": trial_core_item_id, "snapshotId": trial_snapshot_id}
}

# https://doc.jinko.ai/api/#/paths/core-v2-result_manager-scalars_summary/post
response = jinko.make_request(
    path="/core/v2/result_manager/trial_visualization",
    method="POST",
    json=json_data,
)

### Loading scalars into a dataframe

In [None]:
df_scalars = jinko.get_trial_scalars_as_dataframe(
    trial_core_item_id, trial_snapshot_id, scalar_ids=[
            "Blood.Drug.max",
            "bloodFlowRate.tmin",
            "initialCountCancerCells.tmin",
            "initialTumorBurden.tmin",
            "kccCancerCell.tmin",
            "lymphaticFlowRate.tmin",
            "lymphDrainingRate.tmin",
            "tumorBurdenChangeFromBaseline.tend",
            "Tumor.CancerCell.tmin",
            "vmaxCancerCellDeath.tmin",
        ]
)
print("Raw scalar data (first rows):\n")
display(df_scalars.head())
print("\nNumber of patients in the original table:", len(df_scalars))

# Filtering patients (keeping only cross arm baseline values and IV 10mg dose)
df_scalars = df_scalars[df_scalars["armId"].isin(["crossArms", "iv-1-10"])]
print(f"\nNumber of patients in the table after filtering:", len(df_scalars))

# Pivotting to a wide format
df_scalars = df_scalars.drop("armId", axis=1)
df_scalars = df_scalars.pivot(index="patientId", columns="scalarId", values="value")

# Checking the result
print("\nPivoted scalar table (first rows):")
display(df_scalars.head())

# Checking if there are NaN values in the table
nan_rows = df_scalars.isna().any(axis=1)
id_to_remove = nan_rows[nan_rows].index.values
print(
    "\n",
    len(id_to_remove),
    "patient(s) containing NaN values in the table will be removed:",
)
# display(df_scalars[df_scalars.index.isin(id_to_remove)])

# Removing corresponding row(s)
df_scalars = df_scalars.drop(index=id_to_remove)

In [None]:
# Creating a grouping column
df_scalars["Group"] = ["Group1"] * (df_scalars.shape[0] // 2) + ["Group2"] * (
    df_scalars.shape[0] - df_scalars.shape[0] // 2
)

# Creating a additional fake categorical column
df_scalars["TumorType"] = np.random.choice(
    ["Type1", "Type2", "Type3"], int(df_scalars.shape[0])
)

display(df_scalars.head())

## Step 3 : Producing the summary table
### Defining columns to summarize

In [None]:
columns_to_summarize = df_scalars.columns.values.tolist()
columns_to_summarize.remove("Group")
stratify_by = "Group"

### Creating functions to compute summarized statistics (mean and std, or category counts)

In [None]:
def agg_fun(col):
    mean = col.mean()
    std = col.std()
    return f"{mean:.2e} ({std:.2e})"


def category_count_fun(x, categories):
    counts = x.value_counts().reindex(categories, fill_value=0)
    to_print = str(
        "".join([f"{category}: {count}\n" for category, count in counts.items()])
    )
    to_print = to_print[:-4]
    return to_print

In [None]:
df_scalars["TumorType"].dtype.name == "object"
df_scalars.groupby(stratify_by)["TumorType"].apply(
    category_count_fun, categories=sorted(df_scalars["TumorType"].unique())
)

### Computing summary statistics

In [None]:
# Dictionary to store the results
summary_dict = {}

# Looping through the columns and applying the function
for col in columns_to_summarize:
    if pd.api.types.is_numeric_dtype(df_scalars[col]):
        summary_dict[col] = df_scalars.groupby(stratify_by)[col].apply(agg_fun)
    elif (
        df_scalars["TumorType"].dtype.name == "category"
        or df_scalars["TumorType"].dtype.name == "object"
    ):
        summary_dict[col] = df_scalars.groupby(stratify_by)[col].apply(
            category_count_fun, categories=sorted(df_scalars[col].unique())
        )

# Converting the dictionary to a data drame and transposing it
summary_df = pd.DataFrame(summary_dict).transpose()

# Renaming the first index value if necessary
summary_df.columns.name = "Descriptors"

# Display the summarized DataFrame
display(HTML(summary_df.to_html().replace("\\n", "<br>")))