# Visualizing time series

## Introduction

This cookbook will guide you through the creation of a simple visualization from an existing trial in jinko.  
In particular, you will be able to retrieve time series and plot them using plotly.  


Linked resources: [Jinko](https://jinko.ai/project/e0fbb5bb-8929-439a-bad6-9e12d19d9ae4?labels=24574ece-6bde-4d76-896a-187426965a51).

In [None]:
# Jinko specifics imports & initialization
# Please fold this section and do not change
import jinko_helpers as jinko

# Connect to Jinko (see README.md for more options)
jinko.initialize()

In [None]:
# Cookbook specifics imports

import io
import json
import pandas as pd
import plotly.express as px
import plotly.io as pio
import zipfile

# Cookbook specifics constants

# @param {"name":"trialId", "type": "string"}
# Fill the short Id of your Trial (ex: tr-EKRx-3HRt)
trialId = "tr-OxkW-mB8I"

## Let's use the API and plot the data

### Load the trial

In [None]:
if trialId is None:
    raise Exception("Please specify a Trial Id")
else:
    print(f"Using Trial ID: {trialId}")

# Convert short Id to coreItemId
try:
    coreItemId = jinko.get_core_item_id(trialId, 1)
except Exception as e:
    print(f"Failed to find corresponding trial, check the trialId")
    raise

# List all Trial versions (https://doc.jinko.ai/api/#/paths/core-v2-trial_manager-trial-status/post)
try:
    trialVersions = jinko.make_request(
        f'/core/v2/trial_manager/trial/{coreItemId["id"]}/status'
    ).json()
    print(f"Fetched {len(trialVersions)} versions for the trial.")
except Exception as e:
    print(f"Error fetching trial versions: {e}")
    raise

# Get the latest completed version
try:
    latestCompletedVersion = next(
        (item for item in trialVersions if item["status"] == "completed"), None
    )
    if latestCompletedVersion is None:
        raise Exception("No completed Trial version found")
    else:
        print(
            "Successfully fetched this simulation:\n",
            json.dumps(latestCompletedVersion, indent=1),
        )
        # Store the trial Id and the snapshot Id to use in the API requests
        simulationId = latestCompletedVersion["simulationId"]
        trialId = simulationId["coreItemId"]
        trialSnapshotId = simulationId["snapshotId"]
except Exception as e:
    print(f"Error processing trial versions: {e}")
    raise

### Display a results summary

In [None]:
responseSummary = jinko.get_trial_scalars_summary(trialId, trialSnapshotId, print_summary=True)

armNames = responseSummary['arms']

# Store the list of scenario descriptors fetch them
scenarioDescriptors = [
    scalar["id"]
    for scalar in (responseSummary["scalars"] + responseSummary["categoricals"])
    if "ScenarioOverride" in scalar["type"]["labels"]
]
print("List of scenario overrides:\n", scenarioDescriptors, "\n")

### Download time series and scalars results data

In [None]:
# Retrieve time series ids (https://doc.jinko.ai/api/#/paths/core-v2-trial_manager-trial-trialId--snapshots--trialIdSnapshot--output_ids/get)

response = jinko.make_request(
    "/core/v2/trial_manager/trial/%s/snapshots/%s/output_ids"
    % (trialId, trialSnapshotId),
    method="GET",
)
responseSummary = json.loads(response.content.decode("utf-8"))
print("Available time series:\n", responseSummary, "\n")

In [None]:
# Retrieve time series (https://doc.jinko.ai/api/#/paths/core-v2-result_manager-timeseries_summary/post)

# replace here by the time series ids list you want
idsForTimeSeries = [x["id"] for x in responseSummary]

try:
    print("Retrieving time series data...")
    response = jinko.make_request(
        "/core/v2/result_manager/trial/%s/snapshots/%s/timeseries/download" % (
            trialId, trialSnapshotId
        ),
        method='POST',
        json={
            "timeseries": {ts: armNames for ts in idsForTimeSeries},
        },
    )
    if response.status_code == 200:
        print("Time series data retrieved successfully.")
        archive = zipfile.ZipFile(io.BytesIO(response.content))
        filename = archive.namelist()[0]
        print(f"Extracted time series file: {filename}")
        csvTimeSeries = archive.read(filename).decode("utf-8")
    else:
        print(
            f"Failed to retrieve time series data: {response.status_code} - {response.reason}"
        )
        response.raise_for_status()
except Exception as e:
    print(f"Error during time series retrieval or processing: {e}")
    raise

### Postprocess the data in a pandas dataframe

data us post processed using pandas library, and transform into a table that can easily be plotted. 

In [None]:
# Load timeseries into a dataframe
dfTimeSeries = pd.read_csv(io.StringIO(csvTimeSeries))
print("Raw timeseries data (first rows): \n")
display(dfTimeSeries.head())

# Load scalars into a dataframe
dfScalars = jinko.get_trial_scalars_as_dataframe(
    latestCompletedVersion["simulationId"]["coreItemId"], latestCompletedVersion["simulationId"]["snapshotId"], scalar_ids=scenarioDescriptors
)
print("\nRaw scalar data (first rows):\n")
display(dfScalars.head())

# Pivot to a wide format to obtain protocol overrides in columns
dfScalars = dfScalars.pivot(
    index=["armId", "patientId"], columns="scalarId", values="value"
)

print("\nPivotted scalar table (first rows): \n")
display(dfScalars.head())

# Merge both tables together to obtain protocol descriptors per arm
df = dfTimeSeries.merge(
    right=dfScalars, left_on=["Patient Id", "Arm"], right_on=["patientId", "armId"]
)

print("\nMerged table (first rows): \n")
display(df.head())

# Convert time to days
df["Time"] = df["Time"].map(lambda x: x / (60 * 60 * 24))

# Filter the data set to keep only the variable we would like to plot
dfToPlot = df.query('Descriptor=="Blood.Drug"')

### Plot the data

Finally we plot the time series data by facetting over scenario overrides. 

In [None]:
# replace with the most adapated renderer for your notebook (#print(pio.renderers) to get the available list)
pio.renderers.default = "vscode"  # for running in vs code
# pio.renderers.default = 'iframe' # for running in jupyter lab

# adapt the plot to your ids
fig = px.line(
    dfToPlot,
    x="Time",
    y="Value",
    facet_col="fullDose.tmin",
    facet_row="primingDose.tmin",
    color="administrationMode",
    labels={
        "Time": "Time (days)",
        "Value": "Concentration (µg/mL)",
        "fullDose.tmin": "Full dose (mg)",
        "primingDose.tmin": "Priming dose (mg)",
        "administrationMode": "Administration",
    },
    log_y=True,
    height=600,
)
fig.show()