## LSTM Model Tuning using Keras Tuner

This notebook uses the Keras tuner to optimize a LSTM model. The model is used in the federated learning notebook also in this project.

The data used is from the paper: *Framework for Creating Forest Fire Ignition Prediction Datasets.* Each row represents meteorological data at a geographical location at a specific time. TODO: Add table example.

Much of the code used in this notebook is based on the Keras tuner code examples located [here](https://www.tensorflow.org/tutorials/keras/keras_tuner) and the Keras timeseries tutorials located [here](https://keras.io/examples/timeseries/).
The code below is very much a work in progress.

In [1]:
# if this file is being used in colab set to 1 otherwise 0
using_colab = 1

In [2]:
# if on Colab, load data from drive
if (using_colab == 1):
    from google.colab import drive
    drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# if on Colab, what kind of runtime?
if (using_colab ==1):
    from psutil import virtual_memory
    ram_gb = virtual_memory().total / 1e9
    print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

    if ram_gb < 20:
        print('Not using a high-RAM runtime')
    else:
        print('You are using a high-RAM runtime!')

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


In [4]:
# install Keras tuner library if on Colab
!pip install -q keras_tuner

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/176.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [20]:
# load libraries
# TODO: get rid of libraries that aren't needed
import math
import os
import glob
import gc
import datetime

import random

import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras import Model

import keras_tuner as kt

In [21]:
# overall environment settings

# Make TensorFlow logs less verbose
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

# Training on GPU or CPU?
# tf.config.set_visible_devices([], 'GPU')
python_version = !python --version
print(
    f"Training on {'GPU' if tf.config.get_visible_devices('GPU') else 'CPU'}\
    using TensorFlow {tf.__version__}, Keras tuner {kt.__version__}, and {python_version[0]}"
)

Training on CPU    using TensorFlow 2.12.0, Keras tuner 1.3.5, and Python 3.10.12


In [22]:
#global variables

ml_type = 0 # classic ML = 0, federated ML w/ centralized evaluation = 1, federated ML w/ federated eval = 2

cid = str(0) # preliminary client id

master_path = "/content/drive/MyDrive/Colab Notebooks/FF/"
federated_path = master_path + "data/24_clients/"
centralized_path = master_path + "data/01_clients/"
results_path = master_path + "history/"
temp_path = master_path + "history_temp/"
log_dir = master_path + "tensorflow_logs/" + cid + "_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

#master_path = <path to your master directory for this simulation>
#federated_path = <path to your client datasets>
#centralized_path = <path to your server dataset
#results_path = <path to where you want to store results>
#log_dir = <path to where you want to store Tensorflow logs> + cid + "_" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

downsample_test_set = 0 # 0 if test set not downsampled, 1 otherwise

sequence_len = 120 # 5 days * 24 hours
past_len = sequence_len
future_len = 24 # 1*24 hours
sampling_rate = 1 # in time series conversion use every row (hour) in loaded datasets
sequence_stride = 1 # in time series conversion each series is this far apart




## Data loading functions
The data loading functions are from the federated learning portion of this project and not optimized for this notebook.

In [23]:
def normalize_data(x):
    """Normalizes the data of an array by column.
    Shifts and scales inputs into a distribution centered around 0
    with standard deviation 1.

    Parameters
    ----------
    x: NDarray
        An array of feature values.

    Returns
    -------
    features_normalized : NDarray
        The original array, but normalized.
    """
    data = x
    layer = layers.Normalization()
    layer.adapt(data)
    features_normalized = layer(data)
    return features_normalized

In [24]:
def mask_create(x):
    """Finds the class count of the input array and creates a mask that can be used
    to randomly downsample an array of labels so that the number of
    negative labels = the number of positive labels.

    Parameters
    ----------
    x: NDarray
        An array of feature values.

    Returns
    -------
    features_normalized : NDarray
        A masked version of the input array.
    """
    mask_length = x.shape[0]
    mask = tf.reshape(x, [mask_length])
    y, idx, class_count = tf.unique_with_counts(mask)
    ignition_count = tf.get_static_value(class_count[1])
    mask = mask.numpy()
    count = 0
    while count < ignition_count:
        #rand_num = random.randint(0,mask_length)
        rand_num = random.randint(1, mask_length-1)
        if (mask[rand_num] == 0):
            mask[rand_num] = 1
            count += 1
    return mask

In [25]:
def load_datasets(path: str):
    """Loads all the csv datasets in a folder.
    The loaded data is divided into train, validation, and test sets.
    The data is turned into time series data
    All the data is normalized.
    Train and validation datasets are downsampled.
    TODO: divide this function into smaller functions

    Parameters
    ----------
    path: string
        The path to the dataset folder.

    Returns
    -------
    train_x, train_y, val_x, val_y, test_x, test_y : NDarrays
        A masked version of the input array.
    """

    train_x = []
    train_y = []
    val_x = []
    val_y = []
    test_x = []
    test_y = []

    #load data
    for filename in glob.glob(os.path.join(path, '*.csv')):
        print("\nnow reading " + filename + "\n")
        #read file
        df = pd.read_csv(filename, index_col=[0])

        df_train = df[(df['year'] < 2001)]
        df_val = df[(df['year'] > 2001) & (df['year'] < 2012)]
        df_test = df[(df['year'] >= 2012)]

        features = ['stl2',
                    't2m',
                    'stl1',
                    'stl3',
                    'skt',
                    'swvl1',
                    'd2m',
                    'swvl2'
                    ] #this shoudn't be hard coded
        train_features = df_train[features]
        train_labels = df_train[["ignition"]]
        val_features = df_val[features]
        val_labels = df_val[["ignition"]]
        test_features = df_test[features]
        test_labels = df_test[["ignition"]]
        #convert to numpy
        train_features = train_features.values
        val_features = val_features.values
        test_features = test_features.values

        #normalize
        train_features_normalize = normalize_data(train_features)
        val_features_normalize = normalize_data(val_features)
        test_features_normalize = normalize_data(test_features)

        #we want to predict at a future point
        #so we clip the length of the features plus the hours till the future point
        start = past_len + future_len
        train_labels = train_labels.iloc[start:].values
        val_labels = val_labels.iloc[start:].values
        test_labels = test_labels.iloc[start:].values

        batch_size = 107856 #factor of 5136 (321 * 16)
        #convert to time series data
        train_dataset = keras.utils.timeseries_dataset_from_array(
            train_features_normalize,
            train_labels,
            sampling_rate=sampling_rate,
            sequence_length=sequence_len,
            sequence_stride = sequence_stride,
            shuffle=False,
            batch_size=batch_size)

        val_dataset = keras.utils.timeseries_dataset_from_array(
            val_features_normalize,
            val_labels,
            sampling_rate=sampling_rate,
            sequence_length=sequence_len,
            sequence_stride = sequence_stride,
            shuffle=False,
            batch_size=batch_size)

        test_dataset = keras.utils.timeseries_dataset_from_array(
            test_features_normalize,
            test_labels,
            sampling_rate=sampling_rate,
            sequence_length=sequence_len,
            sequence_stride = sequence_stride,
            shuffle=False,
            batch_size=batch_size)

        #for bookkeeping print out the shapes of the datasets
        for train_features, train_labels in train_dataset:
            print("train_dataset features shape:", train_features.shape)
            print("targets_dataset labels shape:", train_labels.shape)
            break

        for val_features, val_labels in val_dataset:
            print("\nval_dataset features shape:", val_features.shape)
            print("val_dataset labels shape:", val_labels.shape)
            break

        for test_features, test_labels in test_dataset:
            print("\ntest_dataset features shape:", test_features.shape)
            print("test_dataset labels shape:", test_labels.shape)
            break

        # randomly downsample the data using masks
        train_mask = mask_create(train_labels)
        train_features_masked = tf.boolean_mask(train_features, train_mask)
        train_labels_masked = tf.boolean_mask(train_labels, train_mask)

        val_mask = mask_create(val_labels)
        val_features_masked = tf.boolean_mask(val_features, val_mask)
        val_labels_masked = tf.boolean_mask(val_labels, val_mask)

        test_mask = mask_create(test_labels)
        test_features_masked = tf.boolean_mask(test_features, test_mask)
        test_labels_masked = tf.boolean_mask(test_labels, test_mask)

        train_x.append(train_features_masked)
        train_y.append(train_labels_masked)
        val_x.append(val_features_masked)
        val_y.append(val_labels_masked)
        if (downsample_test_set == 1):
            test_x.append(test_features_masked)
            test_y.append(test_labels_masked)
        else:
            test_x.append(test_features)
            test_y.append(test_labels)

    print("\nDone loading data.\n")
    return train_x, train_y, val_x, val_y, test_x, test_y



In [26]:
def get_value_count(x):
    """A helper function that returns the count of class labels.

    Parameters
    ----------
    x: NDArray
        An array with class labels.

    Returns
    -------
    non_ignition_count, ignition_count : int
        The counts of the ignition class.
    """
    length = x[0].shape[0]
    x = tf.reshape(x, [length])
    y, idx, class_count = tf.unique_with_counts(x)
    non_ignition_count = tf.get_static_value(class_count[0])
    ignition_count = tf.get_static_value(class_count[1])
    return non_ignition_count, ignition_count


# Load data

In [27]:
# load the dataset for centralized evaluation (for classic ml training and testing)
trainloaders_x, trainloaders_y, valloaders_x, valloaders_y, testloaders_x, testloaders_y = load_datasets(centralized_path)


now reading /content/drive/MyDrive/Colab Notebooks/FF/data/01_clients/dly_avg_1of1_50.csv

train_dataset features shape: (107712, 120, 8)
targets_dataset labels shape: (107712, 1)

val_dataset features shape: (51216, 120, 8)
val_dataset labels shape: (51216, 1)

test_dataset features shape: (46080, 120, 8)
test_dataset labels shape: (46080, 1)

Done loading data.



In [28]:
# a quick check of the label count
count = get_value_count(trainloaders_y)
print("Train set nonignitions and ignitions are:", count)
count = get_value_count(valloaders_y)
print("Validation set nonignitions and ignitions are:", count)
count = get_value_count(testloaders_y)
print("Test set nonignitions and ignitions are:", count)

Train set nonignitions and ignitions are: (2811, 2811)
Validation set nonignitions and ignitions are: (2534, 2534)
Test set nonignitions and ignitions are: (44719, 1361)


In [29]:
# define the metrics to be used

METRICS = [
    keras.metrics.TruePositives(name='tp'),
    keras.metrics.FalsePositives(name='fp'),
    keras.metrics.TrueNegatives(name='tn'),
    keras.metrics.FalseNegatives(name='fn'),
    keras.metrics.BinaryAccuracy(name='accuracy'),
    keras.metrics.Precision(name='precision'),
    keras.metrics.Recall(name='recall'),
    keras.metrics.AUC(name='auc'),
    keras.metrics.AUC(name='prc', curve='PR'), # precision-recall curve
    keras.metrics.SensitivityAtSpecificity(0.5, name='sensitivity'),
    keras.metrics.SpecificityAtSensitivity(0.5, name='specificity')
    #keras.metrics.F1Score(name='f1_score'),#only available with nightly build
]

In [30]:
# define the metrics and model function and tuning parameters

def make_model(hp, metrics=METRICS):
    inputs = keras.Input(shape=(sequence_len, trainloaders_x[0].shape[2]))
    x = layers.LSTM(hp.Choice('units', [8, 16, 32, 64, 128, 256]), activation='sigmoid', return_sequences=True)(inputs)
    x = layers.LSTM(hp.Choice('units', [8, 16, 32, 64, 128, 256]), activation='sigmoid', return_sequences=True)(x)
    x = layers.Flatten()(x)
    outputs = layers.Dense(1, activation=hp.Choice("activation", ["sigmoid", "relu", "tanh"]))(x)
    model = keras.Model(inputs, outputs)

    # Tune the learning rate for the optimizer
    # Choose an optimal value from 0.01, 0.001, or 0.0001
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.compile(
        optimizer=keras.optimizers.legacy.Adam(learning_rate=hp_learning_rate),
        loss=keras.losses.BinaryCrossentropy(),
        metrics=metrics)

    return model


In [31]:
# create a tuner object
tuner = kt.RandomSearch(
    hypermodel=make_model,
    objective="val_accuracy",
    max_trials=12,
    executions_per_trial=2,
    overwrite=True,
    directory="my_dir",
    project_name="tuning_test",
)

In [None]:
# do a hyperparameter search using the tuner object
tuner.search(trainloaders_x[0],
             trainloaders_y[0],
             epochs=5,
             validation_data=(valloaders_x, valloaders_y))
best_model = tuner.get_best_models()[0]

In [None]:
# Get the top 2 models.
models = tuner.get_best_models(num_models=2)
best_model = models[0]
# Build the model.
# Needed for `Sequential` without specified `input_shape`.
#best_model.build(input_shape=(None, 28, 28))
best_model.summary()

In [None]:
tuner.results_summary()