In [59]:
%load_ext autoreload
%autoreload 2

from __future__ import annotations

# Add parent directory to path.
from pathlib import Path
import os
import sys
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # https://stackoverflow.com/a/64438413
fdir = Path(os.path.abspath('')).resolve() # Directory of current file.
path = fdir/'..'
if path not in sys.path:
    sys.path.append(str(path))

# Complete imports.
from functools import partial
import makassar_ml as ml
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_datasets as tfds
import seaborn as sns
from sklearn.model_selection import ParameterGrid
sns.set() # Use seaborn themes.

# List all GPUs visible to TensorFlow.
gpus = tf.config.list_physical_devices('GPU')
print(f"Num GPUs Available: {len(gpus)}")
for gpu in gpus:
    print(f"Name: {gpu.name}, Type: {gpu.device_type}")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Num GPUs Available: 1
Name: /physical_device:GPU:0, Type: GPU


In [60]:
config_roots = dict(
    hp_tuning_root=str(Path('~/research/makassar/hp_tuning').expanduser()),
)
config_dataset = dict(
    parameters=dict(
        path=str(Path('~/research/makassar/datasets/beijing_pm25').expanduser()),
        in_feat=['day_of_year','TEMP','Iws','Is','Ir'],
        out_feat=['DEWP','PRES'],
        in_seq_len=24*30, # hours
        out_seq_len=1,
        shift=1,
        split=[0.7,0.2,0.1],
        shuffle=False,
    ),
)
config_model=dict(
    name='FoT',
    parameters=dict(
        in_seq_len=config_dataset['parameters']['in_seq_len'],
        in_feat=len(config_dataset['parameters']['in_feat']),
        out_feat=len(config_dataset['parameters']['out_feat']),
        embed_dim=dict(
            values=[
                16,
                32,
            ],
        ),
        n_heads=dict(
            values=[
                8,
            ],
        ),
        ff_dim=dict(
            values=[
                256,
                512,
            ],
        ),
        dropout=dict(
            values=[
                0.1,
                0.3,
            ],
        ),
        n_encoders=dict(
            values=[
                3,
                6,
            ],
        ),
    )
)
config_train=dict(
    batch_size=128,
    epochs=30,
    optimizer=dict(
        name='adam',
        parameters=dict(
            lr=0.001,
        ),
    ),
    compile=dict(
        loss='mse',
        metrics=['mae','mape'],
    ),
    callbacks=dict(
        EarlyStopping=dict(
            monitor='val_loss',
            mode='auto',
            patience=50,
            restore_best_weights=True,
        ),
    ),
)

config=dict(
    roots=config_roots,
    dataset=config_dataset,
    model=config_model,
    train=config_train,
)

In [61]:
# Convert config into parameter dictionary.
parameterdict = ml.tuning.config2parameterdict(config)

# Count number of grid combinations.
grid = ParameterGrid(ml.tuning.config2parameterdict(config))
print(f"Parameter grid size: {len(grid)}")

Parameter grid size: 16


In [62]:
def dataset_loader_func(
    batch_size: int,
    ) -> tuple[tf.data.Dataset, tf.data.Dataset, tf.data.Dataset]:
        return ml.datasets.beijingpm25.load_beijingpm25_ds(
            **config['dataset']['parameters'],
            batch_size=batch_size,
        )

In [63]:
def build_model_func(params: dict) -> keras.Model:
    # Create copy of parameter dictionary.
    model_params = dict(**params)

    # Configure optimizer.
    optimizer_config = dict()
    if 'lr' in model_params:
        optimizer_config['lr'] = model_params.pop('lr')
    optimizer_class_name = model_params.pop('optimizer')
    optim = keras.optimizers.get({
    'class_name': optimizer_class_name,
        'config': optimizer_config,
    })

    # Get build function for specific model.
    build_model = getattr(getattr(
        ml.models,
        config['model']['name'].lower(),
        ),
        config['model']['name'],
        )
    model = build_model(
        **model_params,
    )

    # Compile the model.
    model.compile(
        optimizer=optim,
        **config['train']['compile'],
    )
    return model

In [64]:
# Set training strategy.
strategy = tf.distribute.get_strategy()

In [65]:
# Create callback list if any were specified.
callbacks = []
if 'callbacks' in config['train']:
    for key, cb_params in config['train']['callbacks'].items():
        if hasattr(ml.callbacks, key):
            callbacks.append(getattr(ml.callbacks, key)(**cb_params))
        elif hasattr(keras.callbacks, key):
            if key == 'LearningRateScheduler':
                callbacks.append(
                    keras.callbacks.LearningRateScheduler(
                        partial(
                            getattr(ml.schedules, cb_params['schedule']),
                            **cb_params.get('parameters', {})
                        )
                    )
                )
            else:
                callbacks.append(
                    getattr(keras.callbacks, key)(**cb_params)
                )

In [66]:
# Train and evaluate the model.
model, hist, met, params, df = ml.tuning.hp_gridsearch(
    model_name=config['model']['name'].lower(),
    params=parameterdict,
    build_model_func=build_model_func,
    dataset_loader_func=dataset_loader_func,
    metric_list=config['train']['compile']['metrics'],
    batch_size=config['train']['batch_size'],
    strategy=strategy,
    epochs=config['train']['epochs'],
    tuning_root=config['roots']['hp_tuning_root'],
    callbacks=callbacks,
)

Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/usr/local/Caskroom/miniforge/base/envs/ml-metal/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3398, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/var/folders/5k/47hxnf0d2v1cvrggbx8z9ts40000gn/T/ipykernel_4444/76650699.py", line 2, in <cell line: 2>
    model, hist, met, params, df = ml.tuning.hp_gridsearch(
  File "/Volumes/GoogleDrive/My Drive/Virginia Tech/graduate/research/makassar/repos/makassar-ml/jupyter/../makassar_ml/tuning.py", line 211, in hp_gridsearch
    model = build_model_func(p)
  File "/var/folders/5k/47hxnf0d2v1cvrggbx8z9ts40000gn/T/ipykernel_4444/913067817.py", line 16, in build_model_func
    build_model = getattr(
AttributeError: module 'makassar_ml.models' has no attribute 'fot.FoT'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/Caskroom/miniforge/base/envs/ml-metal/lib/python3.8/site-packages/I

In [67]:
# Display best model information.
model.summary()

NameError: name 'model' is not defined

In [None]:
print(f"Tuning Results:")
# Build the resulting table header.
table_header = ['model']
for m in ['loss']+config['train']['compile']['metrics']:
    table_header.append(f"{m}")
    table_header.append(f"val_{m}")
    table_header.append(f"test_{m}")
table_header.extend(list(parameterdict))
# Log results as CSV to console.
csv_df = df[table_header].sort_values(by='val_loss', ascending=True)
print(csv_df.to_string(index=False))
# Log results as CSV to file.
csv_df.to_csv(
    Path(config['roots']['hp_tuning_root'])/config['model']['name'].lower()/f"tuning_results.csv",
    index=False,
)