# Demo for Tensorboard FlowCept Adapter

Please note that this Notebook demonstrates the linkage between a previous workflow run with Dask and this Tensorflow training script. 
Thus, this demo expects that you have exected the `dask.ipynb` Notebook previously.

In [None]:
# ! pip install flowcept[tensorboard]

In [None]:
# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. 
# In a normal user interaction, these sleeps would not be necessary.
from time import sleep

import os
import shutil
from uuid import uuid4
from time import sleep

In [None]:
def run_tensorboard_hparam_tuning(tensorboard_events_dir, epochs=2):
    # Code based on: https://www.tensorflow.org/tensorboard/hyperparameter_tuning_with_hparams
    import tensorflow as tf
    from tensorboard.plugins.hparams import api as hp

    fashion_mnist = tf.keras.datasets.fashion_mnist

    (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0

    # Reduce the dataset size for faster debugging
    DEBUG_SAMPLES_TRAIN = 100  # Number of training samples to keep
    DEBUG_SAMPLES_TEST = 20  # Number of test samples to keep
    
    x_train, y_train = x_train[:DEBUG_SAMPLES_TRAIN], y_train[:DEBUG_SAMPLES_TRAIN]
    x_test, y_test = x_test[:DEBUG_SAMPLES_TEST], y_test[:DEBUG_SAMPLES_TEST]

    HP_NUM_UNITS = hp.HParam("num_units", hp.Discrete([16, 32]))
    HP_DROPOUT = hp.HParam("dropout", hp.RealInterval(0.1, 0.2))
    HP_OPTIMIZER = hp.HParam("optimizer", hp.Discrete(["adam", "sgd"]))
    HP_BATCHSIZES = hp.HParam("batch_size", hp.Discrete([32, 64]))

    HP_MODEL_CONFIG = hp.HParam("model_config")
    HP_OPTIMIZER_CONFIG = hp.HParam("optimizer_config")

    METRIC_ACCURACY = "accuracy"

    with tf.summary.create_file_writer(tensorboard_events_dir).as_default():
        hp.hparams_config(
            hparams=[
                HP_NUM_UNITS,
                HP_DROPOUT,
                HP_OPTIMIZER,
                HP_BATCHSIZES,
                HP_MODEL_CONFIG,
                HP_OPTIMIZER_CONFIG,
            ],
            metrics=[hp.Metric(METRIC_ACCURACY, display_name="Accuracy")],
        )

    def train_test_model(hparams, tensorboard_events_dir):
        model = tf.keras.models.Sequential(
            [
                tf.keras.layers.Flatten(),
                tf.keras.layers.Dense(
                    hparams[HP_NUM_UNITS], activation=tf.nn.relu
                ),
                tf.keras.layers.Dropout(hparams[HP_DROPOUT]),
                tf.keras.layers.Dense(10, activation=tf.nn.softmax),
            ]
        )
        model.compile(
            optimizer=hparams[HP_OPTIMIZER],
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"],
        )

        model.fit(
            x_train,
            y_train,
            epochs=epochs,
            callbacks=[
                tf.keras.callbacks.TensorBoard(tensorboard_events_dir),
                # log metrics
                hp.KerasCallback(tensorboard_events_dir, hparams),  # log hparams
            ],
            batch_size=hparams[HP_BATCHSIZES],
        )  # Run with 1 epoch to speed things up for tests
        _, accuracy = model.evaluate(x_test, y_test)
        return accuracy

    def run(run_dir, hparams):
        with tf.summary.create_file_writer(run_dir).as_default():
            hp.hparams(hparams)  # record the values used in this trial
            accuracy = train_test_model(hparams, tensorboard_events_dir)
            tf.summary.scalar(METRIC_ACCURACY, accuracy, step=1)

    session_num = 0

    for num_units in HP_NUM_UNITS.domain.values:
        for dropout_rate in (
            HP_DROPOUT.domain.min_value,
            HP_DROPOUT.domain.max_value,
        ):
            for optimizer in HP_OPTIMIZER.domain.values:
                for batch_size in HP_BATCHSIZES.domain.values:
                    # These two added ids below are optional and useful
                    # just to contextualize this run.
                    hparams = {
                        "activity_id": "hyperparam_evaluation",
                        "epochs": epochs,
                        HP_NUM_UNITS: num_units,
                        HP_DROPOUT: dropout_rate,
                        HP_OPTIMIZER: optimizer,
                        HP_BATCHSIZES: batch_size,
                    }
                    run_name = f"wf_id_{wf_id}_{session_num}"
                    print("--- Starting trial: %s" % run_name)
                    print(f"{hparams}")
                    run(f"{tensorboard_events_dir}/" + run_name, hparams)
                    session_num += 1

    

In [None]:
# Optionally set up env vars to control Flowcept's log level
%env LOG_STREAM_LEVEL="error"
%env LOG_FILE_LEVEL="debug"

### Set the env var pointing to the conf file where the ports, hostnames, and other conf variables are read from.

There is an exemplary conf file available in the `resources` directory in FlowCept repository. You can use it as is if running this Notebook on your local laptop.

## Set up tensorboard events directory

In [None]:
from flowcept.configs import settings
#tensorboard_events_dir = "my_tb_dir"
tensorboard_events_dir = settings["adapters"]["tensorboard"]["file_path"] # For convenience for these tests, we're getting the file path from the yaml settings file.

In [None]:
# Optional: Delete old tensorboard directories
if os.path.exists(tensorboard_events_dir):
    shutil.rmtree(tensorboard_events_dir)
    sleep(0.5)
    os.mkdir(tensorboard_events_dir)

## Get training parameters from previous Dask workflow run

In [None]:
from flowcept import TaskQueryAPI
from flowcept.commons.utils import get_utc_minutes_ago
query_api = TaskQueryAPI()

In [None]:
_filter = {
    "utc_timestamp": { "$gte" : get_utc_minutes_ago(60) },
    "generated.epochs": { "$gte" : 0 }
}
docs = query_api.query(filter=_filter)
epochs_params = set()
for doc in docs:
    print(f"task={doc['task_id']}, generated epochs={doc['generated']['epochs']}")
    epochs_params.add(doc['generated']['epochs'])
epochs_params

## Initialize Tensorboard's interceptor

In [None]:
from flowcept import Flowcept
flowcept = Flowcept("tensorboard")
flowcept.start()

### Now start a train using the `epochs_params` generated by the Dask workflow.

This example assumes that you have run the Dask notebook example before. If you haven't run it, `epochs_params` will be empty.

In [None]:
epochs_params = epochs_params if len(epochs_params) else {1}

In [None]:
for epochs in epochs_params:
    workflow_id = run_tensorboard_hparam_tuning(tensorboard_events_dir)
    print(f"{epochs}, {workflow_id}")


In [None]:
sleep(10)
flowcept.stop()

## Get the training metadata stored from this workflow

In [None]:
_filter = {
    "workflow_id": workflow_id
}
docs = query_api.query(filter=_filter)
docs