### Ad hoc evaluation
This notebook is an example/template for ad hoc explorations/evaluations tracked on MLflow.
It can be used in the early stages of the development of a ML task, when designing a production-ready Kedro pipeline might still be a bit of an overkill/over-engineering.

Run from the `poetry shell`, in the root dir of this example project:
```bash
NB=adhoc_evaluation.ipynb

papermill notebooks/$NB logs/$NB --cwd notebooks \
        -p tags '{}' \
        -p params '{}' \
        -p tracking '{}'
```
Contents for `tags`, `params` and `tracking` are JSON-encoded.

An interrupted run can be resumed by adding `"run_id": "..."` to the `tracking` argument.

See more options with `papermill --help`

In [None]:
%env KEDRO_LOGGING_CONFIG ../conf/logging.yml

import sys
import json
import functools
import contextlib
from devtools import pprint
from pathlib import Path
from typing import Tuple
from tempfile import TemporaryDirectory

import mlopus
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from kedro.framework.startup import bootstrap_project
from kedro.framework.session import KedroSession
from mlopus.utils import dicts, iter_utils
from pydantic.v1 import BaseModel

from mlopus_kedro_example.nodes import EvalModel

### Reuse the default Kedro session
Useful for inferring some project attributes (e.g.: Git details).

In [None]:
bootstrap_project("../")
session = KedroSession.create()

### MLflow tags
Evaluated by combining the defaults with the command-line argument `-p tags '{}'`

In [None]:
tags = dicts.deep_merge(
    {
        "mlflow": {
            "user": session.store["username"],
            "source": {"commit": session.store["git"]["commit_sha"]},
        },
    },
    json.loads(x) if isinstance(x := locals().get("tags", {}), str) else x,
)

pprint(tags)

### MLflow params
Evaluated by combining the defaults with the command-line argument `-p params '{}'`

In [None]:
class Params(BaseModel):
    k_values: list[int] = [3, 10, 20, 50]
    model_versions: set[str] = {"1", "2"}
    model_name: str = "mlopus_kedro_example"

pprint(params := Params.parse_obj(json.loads(x) if isinstance(x := locals().get("params", {}), str) else x))

### Experiment tracking settings
Evaluated by combining the defaults with the command-line argument `-p tracking '{}'`

In [None]:
class Tracking(BaseModel):
    mlflow: dict = {}
    run_id: str | None = None
    run_name: str | None = None
    exp_name: str = "mlopus_kedro_example"

pprint(tracking := Tracking.parse_obj(json.loads(x) if isinstance(x := locals().get("tracking", {}), str) else x))

### Experiment logic
Yield metrics and artifacts for each evaluated model version.

In [None]:
@contextlib.contextmanager
def experiment_logic(model_version: str) -> Tuple[dict, dict]:
        
    # Load model using the `default` schema and evaluate to get metrics
    metrics = EvalModel(k_values=params.k_values)(
        model := mlopus.artschema.load_artifact(
            schema="default",
            subject=mlflow.get_model(params.model_name).get_version(model_version),
        )
    )

    # Save some temporary artifacts
    with TemporaryDirectory() as tmp:
        np.save(
            file=(vectors := Path(tmp) / "vectors.npy"),
            arr=model.get_vectors_by_labels(model.labels),
        )

        # Define a mapping of `file_name` to `file_path_or_dumper`
        artifacts = {
            vectors.name: vectors,
            "metrics.json": lambda path: path.write_text(json.dumps(metrics)),
        }
    
        yield metrics, artifacts  # Yield metrics and artifacts

### Experiment loop
Run the experiment logic with different model versions while recording tags, params, metrics and artifacts.

In [None]:
mlflow = mlopus.mlflow.get_api(**tracking.mlflow)  # Get MLflow API

with (  # Resume parent run by ID or start a new one
    mlflow.resume_run(tracking.run_id) if tracking.run_id else
    mlflow.get_or_create_exp(tracking.exp_name).start_run(tracking.run_name)
) as parent_run:
    
    print(parent_run.url)  # Run URL
    parent_run.set_tags(tags)  # Set tags
    parent_run.log_params(params.dict())  # Log params

    # In case this parent run has been resumed by ID, find out which model versions
    # have already been evaluated by successfull child runs, so we can skip them.
    skip_versions = set()
    for child_run in parent_run.children:
        if child_run.status == mlopus.mlflow.RunStatus.FINISHED:
            skip_versions = skip_versions.union(
                mlopus.lineage.of(child_run).inputs.models[params.model_name]
            )

    # Iterate model versions that haven't been evaluated yet
    for model_version in params.model_versions.difference(skip_versions):

        # Evaluate the model version in a child run to obtain metrics and artifacts
        with (
            parent_run.start_child(f"eval_v{model_version}") as child_run,
            experiment_logic(model_version) as (metrics, artifacts),
        ):
            
            print(child_run.url)  # Run URL
            child_run.set_tags(tags)  # Set tags
            child_run.log_params(params.copy(update={"model_versions": {model_version}}).dict())
            
            # Register the evaluated model version as an input of the child run
            mlopus.lineage.of(child_run) \
                .with_input_model(params.model_name, model_version) \
                .register()
            
            # Iterate artifacts
            for file_name, file_path_or_dumper in artifacts.items():
                child_run.log_artifact(file_path_or_dumper, file_name)  # Log artifact
            
            child_run.log_metrics(metrics)  # Log metrics

### Experiment summary
Aggregate metrics from child runs into the parent run and publish reports

In [None]:
metrics_by_vector_size = {}

# Iterate child runs
for child_run in parent_run.children:
    if child_run.status != mlopus.mlflow.RunStatus.FINISHED:
        continue  # Skip failed runs

    # Find which model version was evaluated by this child run
    evaluated_version = iter_utils.get_one(
        mlopus.lineage.of(child_run).inputs.models[params.model_name]
    )
    
    # Get the model version metadata
    version_meta = mlflow.get_model(params.model_name).get_version(evaluated_version)

    # Get the vector size used by the run that produced this model
    vector_size = version_meta.run.params["vectors"]["D"]

    # Store values of average ANN distances at K for this vector size
    metrics_by_vector_size[vector_size] = child_run.metrics["avg_dist_at"]

# Create dataframe of average ANN distances at K by vector size
print(df := pd.DataFrame(metrics_by_vector_size).sort_index(axis=1).rename_axis("k"))

# Save dataframe as CSV file in the parent run artifacts
parent_run.log_artifact(df.to_csv, "avg_dist_at_k_by_vector_size.csv")

# Create a plot of mean distances at K by vector size and save to parent run artifacts
for vector_size in df:
    plt.plot(df.index, df[vector_size], marker='o', label=f'D={vector_size}')

plt.ylabel('Mean neighbour distance')
plt.xlabel('K-values (ANN max neighbours)')
plt.title('Mean ANN distances at K by vector size')
plt.legend()

parent_run.log_artifact(
    path_in_run="avg_dist_at_k_by_vector_size.png",
    source=functools.partial(plt.savefig, format="png"),
)

plt.show()

Here's a compact version of the loop from the previous step, using dict comprehension:
```python
metrics_by_vector_size = {
    mlflow \
        .get_model(params.model_name) \
        .get_version(
            iter_utils.get_one(
                mlopus.lineage.of(child_run).inputs.models[params.model_name]
            )
        ) \
        .run.params["vectors"]["D"]: child_run.metrics["avg_dist_at"]
    
    for child_run in parent_run.children
    if child_run.status == mlopus.mlflow.RunStatus.FINISHED
}
```