In [None]:
#@markdown <b>Declare parameters</b><br/>   {display-mode: "form"}
params = {
    'dataset': "nygaardsgata__50_25_25__q4_q1__15_16_17_18_19",
    "train_split": 0.5,
    "val_split": 0.25,
    "test_split": 0.25
}

MODEL_TYPE = "rnn_updated"
LOG_DIR = f"/content/logs/{MODEL_TYPE}"
LOG_LEVEL = "ERROR"

In [None]:
#@markdown <b>Import modules</b><br/>   {display-mode: "form"}
import seaborn as sns
import plotly.graph_objects as go

# Download git repository
import os

# Internal modules
import io, sys, glob, time
from datetime import datetime
from importlib import reload

# External modules
!pip install -q tensorflow-io
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_io as tfio
from tensorflow.keras.callbacks import TensorBoard, EarlyStopping, LearningRateScheduler
from tensorboard.plugins.hparams import api as hp

# Colab modules
from IPython import display

# Set random number generation seeds
np.random.seed(1)
tf.random.set_seed(1)

# Set logging level
!mkdir -p {LOG_DIR}
tf.get_logger().setLevel(LOG_LEVEL)

%load_ext google.colab.data_table
display.clear_output(wait=False)

In [None]:
#@markdown <b>Run me to import underscore module</b><br/>   {display-mode: "form"}
!pip install -q tensorflowjs
!pip install -q githubrelease
import numpy as np
import os, glob, re, time, json
import github_release
import tensorflow.keras.backend as K
from IPython import display
from contextlib import redirect_stdout

compressed_dirs = set()


def _compress(source_path, target_path, target_dir=None):
    if target_dir:
        !mkdir -p {target_dir}
    if target_path.endswith('.tar.gz'):
        !tar -czf {target_path} -C {source_path} .
    elif target_path.endswith('.tar'):
        !tar -cf {target_path} -C {source_path} .
    elif target_path.endswith('.zip'):
        !(cd {source_path} && zip -q -r {target_path} .)


def _extract(source_path, target_path):
    !mkdir -p {target_path}
    if source_path.endswith('.tar.gz'):
        !tar -xzf {source_path} -C {target_path}
    elif source_path.endswith('.tar'):
        !tar -xf {source_path} -C {target_path}
    elif source_path.endswith('.zip'):
        !unzip -qq {source_path} -d {target_path}


def _under(source_path, target_path, copy=True, auth_on_upload=True):
    """
    Use cases:
        Movement:
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
            
        Compression (e.g. from dir to .tar.gz):
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
            
        Extraction (e.g. from .zip to dir):
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
            
        Extraction & compression (e.g. from .zip to .tar.gz):
            - GCS -> GCS
            - GCS -> LOCAL
            - LOCAL -> GCS
            - LOCAL -> LOCAL
    """
    COMPRESSION_FORMATS = ('zip', 'tar', 'tar.gz')
    TEMP_DIR = "/tmp_"
    LOG_TEMPLATE = "{}    from    {}    to    {}"

    # Source
    if source_path.endswith("/"):
        source_path = source_path[:-1]
    source_dir, _, source_name = source_path.rpartition('/')
    source_isgcs = source_path.startswith("gs://")
    source_islocal = not source_isgcs
    if source_islocal:
        source_path = os.path.abspath(source_path)
    source_isprefix, source_isfile, source_ext = source_name.partition('.')
    source_isdir = not source_isfile
    source_iscompression = source_ext in COMPRESSION_FORMATS

    # Target
    target_dir, _, target_name = target_path.rpartition('/')
    target_isgcs = target_path.startswith("gs://")
    target_islocal = not target_isgcs
    target_prefix, target_isfile, target_ext = target_name.partition('.')
    target_isdir = not target_isfile
    target_iscompression = target_ext in COMPRESSION_FORMATS

    # Flags
    MOVE_ONLY = source_ext == target_ext
    GCS_ONLY = source_isgcs and target_isgcs
    RENAME = source_isprefix != target_prefix
    COMPRESSION = source_isdir and target_iscompression
    EXTRACTION = source_iscompression and target_isdir
    EXTRACTION_COMPRESSION = source_iscompression and target_iscompression and source_ext != target_ext

    # Add forward slash if file is at root level
    source_dir = "/" if not source_dir else source_dir
    target_dir = "/" if not target_dir else target_dir

    # Authenticate if writing to GCS
    if target_isgcs and auth_on_upload:
        from google.colab import auth
        auth.authenticate_user()

    # Assert that subdirectories exist if target is local
    if target_islocal:
        !mkdir -p {target_dir}

    # Movement commands
    if MOVE_ONLY:
        # GCS -> GCS
        if source_isgcs and target_isgcs:
            action = "COPYING" if copy else "MOVING"
            print(LOG_TEMPLATE.format(f"{action} (1/1)", source_path, target_path))
            if copy:
                !gsutil -m -q cp -r {source_path} {target_path}
            else:
                !gsutil -m -q mv {source_path} {target_path}
        
        # LOCAL -> LOCAL
        elif source_islocal and target_islocal:
            action = "COPYING" if copy else "MOVING"
            print(LOG_TEMPLATE.format(f"{action} (1/1)", source_path, target_path))
            if copy:
                !cp -r {source_path} {target_path}
            else:
                !mv {source_path} {target_path}
        
        # GCS -> LOCAL
        elif source_isgcs and target_islocal:
            if source_isdir:
                print(LOG_TEMPLATE.format("DOWNLOADING DIR (1/1)", source_path, target_dir))
                !gsutil -m -q cp -r {source_path} {target_dir}
                if RENAME:
                    print(LOG_TEMPLATE.format("\tRENAMING DIR", source_isprefix, target_prefix))
                    !mv {target_dir}/{source_isprefix} {target_dir}/{target_prefix}
            else:
                print(LOG_TEMPLATE.format("DOWNLOADING FILE (1/1)", source_path, target_path))
                !gsutil -m -q cp {source_path} {target_path}
        
        # LOCAL -> GCS
        if source_islocal and target_isgcs:
            if source_isdir:
                print(LOG_TEMPLATE.format("UPLOADING DIR (1/1)", source_path, target_path))
                !gsutil -m -q cp -r {source_path} {target_path}
            else:
                print(LOG_TEMPLATE.format("UPLOADING FILE (1/1)", source_path, target_path))
                !gsutil -m -q cp {source_path} {target_path}
        return


    # Create directory for intermediate storage if required
    if source_isgcs or target_isgcs or EXTRACTION_COMPRESSION:
        !mkdir -p {TEMP_DIR}
    

    # For remaining operations, download GCS source to temp and treat as local
    if source_isgcs:
        if source_isdir:
            print(LOG_TEMPLATE.format("\tDOWNLOADING DIR", source_path, TEMP_DIR))
            !gsutil -m -q cp -r {source_path} {TEMP_DIR}
        else:
            print(LOG_TEMPLATE.format("\tDOWNLOADING FILE", source_path, f"{TEMP_DIR}/{source_name}"))
            !gsutil -m -q cp {source_path} {TEMP_DIR}/{source_name}
        source_path = f"{TEMP_DIR}/{source_name}"
        source_dir = TEMP_DIR

    # Compression
    if COMPRESSION:
        if target_islocal:
            print(LOG_TEMPLATE.format("COMPRESSING (1/1)", source_path, target_path))
            _compress(source_path, target_path, target_dir=target_dir)
        else:
            print(LOG_TEMPLATE.format("COMPRESSING (1/2)", source_path, f"{TEMP_DIR}/{target_name}"))
            _compress(source_path, f"{TEMP_DIR}/{target_name}")
            print(LOG_TEMPLATE.format("UPLOADING FILE (2/2)", f"{TEMP_DIR}/{target_name}", target_path))
            !gsutil -m -q cp {TEMP_DIR}/{target_name} {target_path}

    # Extraction
    elif EXTRACTION:
        if target_islocal:
            print(LOG_TEMPLATE.format("EXTRACTING (1/1)", source_path, target_path))
            _extract(source_path, target_path)
        else:
            print(LOG_TEMPLATE.format("EXTRACTING (1/2)", source_path, f"{TEMP_DIR}/{target_name}"))
            _extract(source_path, f"{TEMP_DIR}/{target_name}")
            print(LOG_TEMPLATE.format("UPLOADING DIR (2/2)", f"{TEMP_DIR}/{target_name}", target_path))
            !gsutil -m -q cp -r {TEMP_DIR}/{target_name} {target_path}

    # Extraction & compression
    elif EXTRACTION_COMPRESSION:
        if target_islocal:
            print(LOG_TEMPLATE.format("EXTRACTING (1/2)", source_path, f"{TEMP_DIR}/{target_prefix}"))
            _extract(source_path, f"{TEMP_DIR}/{target_prefix}")
            print(LOG_TEMPLATE.format("COMPRESSING (2/2)", f"{TEMP_DIR}/{target_prefix}", target_path))
            _compress(f"{TEMP_DIR}/{target_prefix}", target_path, target_dir=target_dir)
        else:
            print(LOG_TEMPLATE.format("EXTRACTING (1/3)", source_path, f"{TEMP_DIR}/{target_prefix}"))
            _extract(source_path, f"{TEMP_DIR}/{target_prefix}")
            print(LOG_TEMPLATE.format("COMPRESSING (2/3)", f"{TEMP_DIR}/{target_prefix}", f"{TEMP_DIR}/{target_name}"))
            _compress(f"{TEMP_DIR}/{target_prefix}", f"{TEMP_DIR}/{target_name}")
            print(LOG_TEMPLATE.format("UPLOADING FILE (3/3)", f"{TEMP_DIR}/{target_name}", target_path))
            !gsutil -m -q cp {TEMP_DIR}/{target_name} {target_path}
    
    # Cleanup intermediate storage
    !rm -rf {TEMP_DIR}

def _set_gh_token(token):
    os.environ["GITHUB_TOKEN"] = token


def _export_array(array, release_name, prefix="", splits=3):
    dir_path = f"/tmp_/{release_name}"
    !mkdir -p {dir_path}
    n_digits = len(str(splits - 1))
    subarrays = np.array_split(array, splits)
    for i, subarray in enumerate(subarrays):
        filename = f"{prefix}__{str(i).zfill(n_digits)}.npy"
        np.save(f"{dir_path}/{filename}", subarray)


def _concat_arrays(paths):
    return np.concatenate([np.load(path, allow_pickle=True) for path in sorted(paths)])


def _to_gh(user_name, repo_name, release_name, split_size=600, **arr_kwargs):
    # Assert that GitHub Auth token is set
    if "GITHUB_TOKEN" not in os.environ:
        print("GitHub authentication token is not set.")
        print("Set token using the '_set_gh_token(token_string)' method.")
        print("Minimal required auth scope is 'repo/public_repo' for public repositories.")
        print("URL: https://github.com/settings/tokens/new")
        return

    # Split arrays
    for prefix, array in arr_kwargs.items():
        splits = int((array.nbytes/1_000_000) // split_size) + 1
        _export_array(array, release_name, prefix=prefix, splits=splits)

    # Upload arrays
    github_release.gh_release_create(
        f"{user_name}/{repo_name}", 
        release_name, 
        publish=True, 
        name=release_name, 
        asset_pattern=f"/tmp_/{release_name}/*"
    )
    !rm -rf /tmp_/*


def _from_gh(user_name, repo_name, release_name):
    # Download release to temporary directory
    print("Downloading dataset in parallell ... ", end='\t')
    t0 = time.perf_counter()
    assets = github_release.get_assets(f"{user_name}/{repo_name}", tag_name=release_name)
    download_urls = [asset['browser_download_url'] for asset in assets]
    urls_str = " ".join(download_urls)
    !echo {urls_str} | xargs -n 1 -P 8 wget -q -P /tmp_/{release_name}_dl/
    t1 = time.perf_counter()
    print(f"done! ({t1 - t0:.3f} seconds)")

    # Load data into numpy arrays
    paths = glob.glob(f"/tmp_/{release_name}_dl/*.npy")
    groups = {}
    for path in paths:
        match = re.match(r".*/(.*)__[0-9]*\.npy", path)
        if match:
            prefix = match.group(1)
            groups[prefix] = groups.get(prefix, []) + [path]
    arrays_dict = {name: _concat_arrays(paths) for name, paths in groups.items()}
    !rm -rf /tmp_/*
    return arrays_dict
    

def _log_to_gh(user, repo, tag, log_dir="/tmp/logs"):
    # Create temporary directory for compressed logs
    !mkdir -p /tmp/compressed_logs
    
    # Compress all directories in log dir
    for dirname in os.listdir(log_dir):
        # Skip files
        if "." in dirname or dirname in compressed_dirs:
            continue

        # Compress
        _under(f"{log_dir}/{dirname}", f"/tmp/compressed_logs/{dirname}.tar.gz")
        compressed_dirs.add(dirname)

    # Upload compressed logs to GitHub
    github_release.gh_asset_upload(f"{user}/{repo}", tag, f"/tmp/compressed_logs/*.tar.gz")

    # Cleanup compressed logs
    !rm -rf /tmp/compressed_logs/*

def timeit(method):
    def timed(*args, **kw):
        ts = time.perf_counter()
        result = method(*args, **kw)
        te = time.perf_counter()
        diff = te - ts
        print(f"{method.__name__}: {diff:.8f} s")
        return result
    return timed

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)


#Baseline model
class BaselineModel(tf.keras.Model):
    def __init__(self, params, hparams, **kwargs):
        super().__init__(**kwargs)
        all_params = {**params , **hparams}
        self.output_sequence_len = all_params['output_sequence_len']

    def call(self, inputs):
        raw_nox = inputs[..., 0] # Moved NOX to first column in dataframe
        raw_labels = raw_nox[..., -1]
        repeated_labels = tf.repeat(raw_labels[..., None], self.output_sequence_len, axis=1)
        return repeated_labels


@timeit
def _export_model(model, model_name, model_type, train_dataset, val_dataset, test_dataset, params, hparams, history, log_dir, loss, metrics):
    # Create temporary directory
    target_dir = f"/tmp/models/{model_type}/{model_name}"
    !mkdir -p {target_dir}

    # Write export logs to file
    export_logs_path = os.path.join(target_dir, "export_logs.txt")
    with open(export_logs_path, 'w') as export_logs:
        with redirect_stdout(export_logs):
            # Get number of parameters
            params_counts = {
                "trainable_params": np.sum([K.count_params(w) for w in model.trainable_weights]),
                "non_trainable_params": np.sum([K.count_params(w) for w in model.non_trainable_weights])
            }
            params_counts["total_params"] = params_counts["trainable_params"] + params_counts["non_trainable_params"]

            # Generate baselines
            baseline_model = BaselineModel(params, hparams)
            baseline_model.compile(loss=loss, metrics=metrics)
            
            # Generate evaluation metrics for validation and test set
            final_metrics_train = model.evaluate(train_dataset, return_dict=True)
            final_metrics_train = {f"final_train_{k}": v for k, v in final_metrics_train.items()}
            final_metrics_val = model.evaluate(val_dataset, return_dict=True)
            final_metrics_val = {f"final_val_{k}": v for k, v in final_metrics_val.items()}
            final_metrics_test = model.evaluate(test_dataset, return_dict=True)
            final_metrics_test = {f"final_test_{k}": v for k, v in final_metrics_test.items()}
            
            # Generate baseline metrics for validation and test set
            baseline_metrics_train = baseline_model.evaluate(train_dataset, return_dict=True)
            baseline_metrics_train = {f"baseline_train_{k}": v for k, v in baseline_metrics_train.items()}
            baseline_metrics_val = baseline_model.evaluate(val_dataset, return_dict=True)
            baseline_metrics_val = {f"baseline_val_{k}": v for k, v in baseline_metrics_val.items()}
            baseline_metrics_test = baseline_model.evaluate(test_dataset, return_dict=True)
            baseline_metrics_test = {f"baseline_test_{k}": v for k, v in baseline_metrics_test.items()}

            # Generate Dataframe and export to parquet
            logs_params = {
                "num_epochs": len(history.epoch),
                **params,
                **hparams,
                **history.params,
                **params_counts,
                **final_metrics_train,
                **final_metrics_val,
                **final_metrics_test,
                **baseline_metrics_train,
                **baseline_metrics_val,
                **baseline_metrics_test
            }
            logs_df = pd.DataFrame({**history.history, "epoch": history.epoch})
            for param, value in logs_params.items():
                logs_df[param] = value
            logs_df.to_parquet(os.path.join(target_dir, f"{model_name}.parquet"))

            # Dump all parameters and metadata to .json file
            with open(os.path.join(target_dir, 'model_details.json'), 'w') as f:
                json.dump(logs_params, f, cls=NpEncoder, indent=4)

            def _convert_model(model, subdir="model"):
                # Create subdirectory
                subdir_path = os.path.join(target_dir, subdir)
                !mkdir -p {subdir_path}

                # Write model summary to file
                model_summary_path = os.path.join(subdir_path, "model_summary.txt")
                with open(model_summary_path, 'w') as model_summary:
                    with redirect_stdout(model_summary):
                        model.summary()

                # Export model summary as image
                model_summary_img_path = os.path.join(subdir_path, "model_summary.png")
                tf.keras.utils.plot_model(model, to_file=model_summary_img_path, show_shapes=True)

                # Generate model paths
                keras_model_path = os.path.join(subdir_path, "keras_model.h5")
                saved_model_path = os.path.join(subdir_path, "saved_model")

                # Save and convert model
                model.save(keras_model_path)
                tf.saved_model.save(model, saved_model_path)
            
            # Convert full model
            _convert_model(model, subdir="model")

            # Compress TensorBoard logs
            model_log_dir = os.path.join(LOG_DIR, model_name)
            tensorboard_logs_path = os.path.join(target_dir, f"{model_name}.tar.gz")
            _under(model_log_dir, tensorboard_logs_path)

    # Upload logs to GCS
    _under(target_dir, f"gs://telenor-data-science/models/{model_type}/{model_name}", auth_on_upload=False)
    return logs_df

display.clear_output(wait=False)

In [None]:
#@markdown <b>Download dataset from GCS</b><br/>   {display-mode: "form"}
# Download .parquet files
SOURCE_PATH = f"gs://telenor-data-science/datasets/final_datasets/{params['dataset']}"
TARGET_PATH = "/content/nygaardsgata_datasetssssssssssss"
_under(SOURCE_PATH, TARGET_PATH)

# Load all .parquet files as dataframes
dfs = {}
for path in glob.glob(f"{TARGET_PATH}/**/*.parquet", recursive=True):
    df = pd.read_parquet(path)
    df_name = path.split(os.sep)[-1].split('.')[0]
    dfs[df_name] = df

# Convert dataframe to numpy arrays
train_df, val_df, test_df = dfs['train'], dfs['val'], dfs['test']

train_data = train_df.values.astype('float32')
train_targets = train_df['nox'].values.astype('float32')

val_data = val_df.values.astype('float32')
val_targets = val_df['nox'].values.astype('float32')

test_data = test_df.values.astype('float32')
test_targets = test_df['nox'].values.astype('float32')

data_arrays = train_data, val_data, test_data
target_arrays = train_targets, val_targets, test_targets
display.clear_output(wait=False)

In [None]:
#@markdown <b>Build time series dataset</b><br/>   {display-mode: "form"}
def build_datasets(data_arrays, target_arrays, params, hparams):
    def build_dataset(data, sequence_length=24, batch_size=32):
        return tf.keras.preprocessing.timeseries_dataset_from_array(
            data, None, sequence_length=sequence_length, 
            sampling_rate=1, batch_size=batch_size 
        )
    all_params = {**params, **hparams}
    batch_size = all_params['batch_size']

    # Build TensorFlow Datasets
    train_dataset_data = build_dataset(train_data[:-all_params['output_sequence_len'], ...], sequence_length=all_params['input_sequence_len'], batch_size=batch_size)
    val_dataset_data = build_dataset(val_data[:-all_params['output_sequence_len'], ...], sequence_length=all_params['input_sequence_len'], batch_size=batch_size)
    test_dataset_data = build_dataset(test_data[:-all_params['output_sequence_len'], ...], sequence_length=all_params['input_sequence_len'], batch_size=batch_size)

    train_dataset_targets = build_dataset(train_targets[all_params['input_sequence_len']:], sequence_length=all_params['output_sequence_len'], batch_size=batch_size)
    val_dataset_targets = build_dataset(val_targets[all_params['input_sequence_len']:], sequence_length=all_params['output_sequence_len'], batch_size=batch_size)
    test_dataset_targets = build_dataset(test_targets[all_params['input_sequence_len']:], sequence_length=all_params['output_sequence_len'], batch_size=batch_size)

    train_dataset = tf.data.Dataset.zip((train_dataset_data, train_dataset_targets))
    val_dataset = tf.data.Dataset.zip((val_dataset_data, val_dataset_targets))
    test_dataset = tf.data.Dataset.zip((test_dataset_data, test_dataset_targets))
    return train_dataset, val_dataset, test_dataset
display.clear_output(wait=False)

In [None]:
#@markdown <b>Build TensorFlow Graph</b><br/>   {display-mode: "form"}
cell_types = {
    'gru': tf.keras.layers.GRU,
    'lstm': tf.keras.layers.LSTM,
    'bi-gru': lambda *args, **kwargs: tf.keras.layers.Bidirectional(tf.keras.layers.GRU(*args, **kwargs)),
    'bi-lstm': lambda *args, **kwargs: tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(*args, **kwargs)),
}

optimizers = {
    'adam': tf.keras.optimizers.Adam,
    'rmsprop': tf.keras.optimizers.RMSprop,
    'sgd': tf.keras.optimizers.SGD
}

def build_model(params, hparams):
    all_params = {**params, **hparams}
    model = tf.keras.Sequential()
    
    # Recurrent units
    for _ in range(hparams['recurrent_layers'] - 1):
        model.add(cell_types[hparams['recurrent_cell']](hparams['recurrent_units'], return_sequences=True, kernel_regularizer=tf.keras.regularizers.L2()))
        model.add(tf.keras.layers.Dropout(0.1))
    model.add(cell_types[hparams['recurrent_cell']](hparams['recurrent_units'], kernel_regularizer=tf.keras.regularizers.L2()))

    model.add(tf.keras.layers.Dense(all_params['output_sequence_len']))
    return model

def train_model(params, hparams, metrics, log_dir, model_type, **fit_kwargs):
    # Create callbacks and prepare logging
    early_stopping = EarlyStopping(patience=10, restore_best_weights=True, verbose=1)
    timestamp = datetime.now()
    dir_name = f"{model_type}_{timestamp:%Y%m%d_%H%M%S}"
    filename = f"{log_dir}/{dir_name}"
    hparams["__timestamp__"] = int(f"{timestamp:%Y%m%d%H%M%S}")
    tensorboard = TensorBoard(filename, write_graph=False, histogram_freq=0, write_images=False)
    hp_board = hp.KerasCallback(filename, hparams, trial_id=dir_name)
    callbacks = [tensorboard, hp_board, early_stopping]

    # Build model and run
    model = build_model(params, hparams)
    model.compile(
        optimizer=optimizers[hparams['optimizer']](learning_rate=hparams['learning_rate']), 
        loss=hparams['loss'], 
        metrics=metrics
    )
    history = model.fit(callbacks=callbacks, **fit_kwargs)
    return model, history, dir_name

In [None]:
#@markdown <b>Define hyperparameter domains</b><br/>   {display-mode: "form"}
# Evaluation metrics
metrics = [tf.keras.metrics.MeanSquaredError(name='mse'),
      tf.keras.metrics.MeanAbsoluteError(name='mae'),
      tf.keras.metrics.MeanSquaredLogarithmicError(name='msle')]
      
hp_metrics = [hp.Metric('mse', display_name='mse'),
    hp.Metric('mae', display_name='mae'),
    hp.Metric('msle', display_name='msle')]

hparams_refs = {
    'recurrent_cell': hp.HParam('recurrent_cell', hp.Discrete(['gru', 'lstm'])),
    'recurrent_layers': hp.HParam('recurrent_layers', hp.IntInterval(2, 4)),
    'recurrent_units': hp.HParam('recurrent_units', hp.Discrete([16, 32, 64, 128])),

    'input_sequence_len': hp.HParam('input_sequence_len', hp.Discrete([24, 48])),
    'output_sequence_len': hp.HParam('output_sequence_len', hp.Discrete([12, 24, 48])),

    'batch_size': hp.HParam('batch_size', hp.Discrete([32, 64, 128, 256])),
    'optimizer': hp.HParam('optimizer', hp.Discrete(['adam', 'sgd'])),
    'loss': hp.HParam('loss', hp.Discrete(['mse', 'mae'])),
    'learning_rate': hp.HParam('learning_rate', hp.Discrete([10**-3.5, 10e-4, 10**-4.5])),
}

In [None]:
#@markdown <b>Train model &#128515;&#128515;&#128515;</b><br/>   {display-mode: "form"}
# Random serach with parameter lock
hparams_locked = {
    'recurrent_cell': 'lstm',
    # 'recurrent_layers': 3,
    # 'recurrent_units': 64,

    # 'input_sequence_len': 168,
    'output_sequence_len': 48,

    # 'batch_size': 64,
    # 'optimizer': 'adam',
    # 'loss': 'mse',
    # 'learning_rate': 10e-4, 
}

NUM_ITERATIONS = 300
for i in range(NUM_ITERATIONS):
    hparams = {k: v.domain.sample_uniform() for k, v in hparams_refs.items() if k not in hparams_locked}
    hparams.update(hparams_locked)

    try:
        # Build datasets
        datasets = build_datasets(data_arrays, target_arrays, params, hparams)
        train_dataset, val_dataset, test_dataset = datasets
        print(hparams)

        fit_kwargs = {
            "x": train_dataset,
            "validation_data": val_dataset,
            "epochs": 300,
            "verbose": 0
        }
        model, history, model_name = train_model(params, hparams, metrics, log_dir=LOG_DIR, model_type=MODEL_TYPE, **fit_kwargs)
        _export_model(model, model_name, MODEL_TYPE, train_dataset, val_dataset, test_dataset, params, hparams, history, LOG_DIR, hparams['loss'], metrics)
    except Exception as e:
        print(e)

In [None]:
#@markdown <b>Single run &#128515;&#128515;&#128515;</b><br/>   {display-mode: "form"}
# Random serach with parameter lock
hparams = {
    'recurrent_cell': 'lstm',
    'recurrent_layers': 2,
    'recurrent_units': 64,

    'input_sequence_len': 168,
    'output_sequence_len': 24,

    'batch_size': 32,
    'optimizer': 'sgd',
    'loss': 'mse',
    'learning_rate': 10e-4, 
}


try:
    # Build datasets
    datasets = build_datasets(data_arrays, target_arrays, params, hparams)
    train_dataset, val_dataset, test_dataset = datasets
    print(hparams)

    fit_kwargs = {
        "x": train_dataset,
        "validation_data": val_dataset,
        "epochs": 300,
        "verbose": 1
    }
    model, history, model_name = train_model(params, hparams, metrics, log_dir=LOG_DIR, model_type=MODEL_TYPE, **fit_kwargs)
    _export_model(model, model_name, MODEL_TYPE, train_dataset, val_dataset, test_dataset, params, hparams, history, LOG_DIR, hparams['loss'], metrics)
except Exception as e:
    print(e)

In [None]:
#@title TensorBoard { run: "auto", vertical-output: true, display-mode: "form" }
%reload_ext tensorboard
%tensorboard --logdir /tmp/tb_files/rnn_updated --port 6006
display.clear_output(wait=False)
tb.notebook.display(height=1400)