In [None]:
%config InteractiveShell.ast_node_interactivity='last_expr_or_assign'  # always print last expr.
%config InlineBackend.figure_format = 'svg'
%matplotlib inline

In [None]:
import warnings

from torchinfo import summary

warnings.filterwarnings(action="ignore", category=UserWarning, module="tsdm")

# Load 𝗧ime 𝗦eries 𝗗atasets & 𝗠odels (𝚝𝚜𝚍𝚖)

In [None]:
import tsdm

## Time Series Datasets

### A **time series** is a tuple $D = (𝐓𝐒, M)$

- Time-indexed data $𝐓𝐒=\{(tᵢ, vᵢ)∣i=1…n\}$
  - timestamps $t∈𝓣$, values $v∈𝓥$
- Time-independent metadata $M∈𝓜$


### A **time series collection** is a tuple $C = (I,S,G)$ consisting of

- Index $I⊆𝓘$ (of id's)
- Collection of timeseries $\{Dᵢ=(𝐓𝐒ᵢ, Mᵢ) ∣ i∈𝓘，𝐓𝐒ᵢ∈(𝓣×𝓥)ᵢ^*， Mᵢ∈𝓜ᵢ\}$
- Index-independent Global data $G∈𝓖$
- If $𝓣ᵢ=𝓣$, $𝓥ᵢ=𝓥$ and $𝓜ᵢ=𝓜$ ⟶ **equimodal**

## Examples

1. Clinical data:
    - index $𝓘$: patient / admission id
    - metadata $M_i$: patient metadata (age, sex, preconditions, ...)
    - values $V_i$: measured data (heart rate, blood pressure, etc.) 
    - globals $G$: units of measurement, measurement devices used, etc.
    
2. Bioreactor data
    - index $𝓘$: experiment / run id
    - metadata $M_i$: bacterial stem used, reactor size, reactor type
    - values $V_i$: measured data (O₂-, Glucose-, Acetate- concentration, stirring speed) 
    - globals $G$: units of measurement, measurement devices used, etc. 

# Load the dataset

In [None]:
from tsdm.datasets import KIWI_RUNS

dataset = KIWI_RUNS()

In [None]:
ts = dataset.timeseries

In [None]:
md = dataset.metadata

In [None]:
dataset.units

# Load the model & encoder

Preliminary API, ideally should be replaced with a database lookup.

⇝ Encoder depends both on model and dataset.

tags: model, dataset, fold, epoch, hyperparameters

In [None]:
import tsdm

In [None]:
from torchinfo import summary

from tsdm.models.pretrained import LinODEnet

model = LinODEnet()
summary(model)

## Make a prediction

In [None]:
ts

In [None]:
key = (run_id, experiment_id) = (510, 16871)

In [None]:
s = ts.loc[key].astype(float)
s

In [None]:
from tsdm.encoders import *

encoder = Frame2Tensor() @ FrameEncoder(
    Standardizer() @ FloatEncoder(), index_encoders=MinMaxScaler() @ TimeDeltaEncoder()
)
# encoder = TensorEncoder() @ FloatEncoder()
encoder.fit(s)

In [None]:
observables = ["Base", "DOT", "Glucose", "OD600", "Acetate", "Fluo_GFP", "Volume", "pH"]
controls = [
    "Cumulated_feed_volume_glucose",
    "Cumulated_feed_volume_medium",
    "InducerConcentration",
    "StirringSpeed",
    "Flow_Air",
    "Temperature",
    "Probe_Volume",
]

In [None]:
total_horizon = slice(None, np.timedelta64(6, "h"))
observation_horizon = slice(np.timedelta64(0, "h"), np.timedelta64(6, "h"))
forecasting_horizon = slice(np.timedelta64(3, "h"), np.timedelta64(6, "h"))

In [None]:
inputs = s.loc[total_horizon].copy()
inputs.loc[forecasting_horizon, observables] = float("nan")

inputs

In [None]:
T, X = encoder.encode(inputs)

# Make a Prediction with the model

In [None]:
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

In [None]:
s.index.to_numpy()

In [None]:
import datetime

import matplotlib
import numpy as np


def timeTicks(x, pos):
    d = datetime.timedelta(minutes=x)
    return str(d)


formatter = matplotlib.ticker.FuncFormatter(timeTicks)

In [None]:
fig, ax = plt.subplots(constrained_layout=True)

T = s.index.to_numpy() / np.timedelta64(1, "h")

ax.plot(T, s["DOT"], ".b", label="DOT")
# ax.xaxis.set_major_formatter(formatter)
ax.legend()

# What can models do?

In [None]:
Level 1: Simple forecasting model
Level 2: Probabilistic forecasting model
Level 3: Forecast across datasets
Level 4:

## What would be nice to have?

- Data Collection
    - Delayed measurements should come with 2 timestamps. (When was sample collected, when was value recorded)
- Data Validation
    - Categories instead of strings
    - Nullable datatypes for missing values (avoid using e.g. -999 as representation for missing value) 
    - Timedeltas and Datetimes
- Data Annotation - Adding Expert Knowledge
    - Biologically plausible minimal / maximal values possible
- Model database

# Types of temporal data

In [None]:
%%latex
\begin{table}[H]
\centering
\begin{tabular}{lccc}
   data       & obs.-horizon & pred.-horizon & predicted %
\\ \midrule
   $X$        & {\color{DarkGreen} ✔} & {\color{Crimson} ✘}   & {\color{Crimson} ✘}    %
\\ $U$        & {\color{DarkGreen} ✔} & {\color{DarkGreen} ✔} & {\color{Crimson} ✘}    %
\\ $Y^{auto}$ & {\color{DarkGreen} ✔} & {\color{Crimson} ✘}   & {\color{DarkGreen} ✔}  %
\\ $Y^{pred}$ & {\color{Crimson} ✘}   & {\color{Crimson} ✘}   & {\color{DarkGreen} ✔}  %
\end{tabular}
\end{table}