<a href="https://colab.research.google.com/github/jorgecabrejas7/TFM/blob/main/Hyperparam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install tensorflow-addons pydot graphviz keras-tuner --upgrade

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (612 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/612.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/612.1 kB[0m [31m5.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━[0m [32m532.5/612.1 kB[0m [31m7.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.1/612.1 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Collecting keras-tuner
  Downloading keras_tuner-1.3.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Collecting kt-legacy (from keras-t

In [3]:
import os
from typing import *

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import tensorflow_addons as tfa
from keras import backend as K
from keras.callbacks import Callback
from keras.layers import (
    BatchNormalization,
    Concatenate,
    Conv1D,
    ConvLSTM1D,
    Dense,
    Dropout,
    Flatten,
    Input,
    Layer,
    LayerNormalization,
    MaxPooling1D,
    MultiHeadAttention,
)
from keras.optimizers import Adam, RMSprop
from keras.utils import Sequence
from keras_tuner.engine.hyperparameters import HyperParameters
from keras_tuner.tuners import Hyperband, BayesianOptimization
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

# from metrics import *

%matplotlib inline

plt.style.use("seaborn-v0_8-darkgrid")

DRIVE_PATH = "/content/drive/MyDrive/TFM"
PATH = os.path.join(DRIVE_PATH, "data")
SYMBOLS = ["ADA", "BNB", "BTC", "EOS", "ETH", "LTC", "TRX", "VET", "XRP"]


def to_csvf(x):
    return x + "USDT.csv"


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



In [4]:
def read_file(symbol: str, tf: str, timestamp_unit: str = "ms") -> pd.DataFrame:
    """
    Reads a CSV file, assigns column names, converts the 'date' column to datetime,
    and sets it as the DataFrame's index.

    Parameters
    ----------
    symbol : str
        The symbol.
    tf : str
        The tf.
    timestamp_unit : str, default 'ms'
        The unit of the timestamp in the 'date' column. By default, it's 'ms' (milliseconds).

    Returns
    -------
    pd.DataFrame
        The DataFrame with the 'date' column converted to datetime and set as the index.
    """

    df = pd.read_csv(to_csvf(os.path.join(PATH, tf, symbol)), header=None).iloc[:, 0:6]
    df.columns = ["date", "open", "high", "low", "close", "volume"]
    df["date"] = pd.to_datetime(df["date"], unit=timestamp_unit)
    df.set_index("date", inplace=True)

    return df


def create_input_tensor(data, lookback=250 * 3):
    inputs = []
    for i in range(0, len(data) - lookback, 3):
        inputs.append(data.iloc[i : i + lookback].values)
        # if i < 24: print(data.iloc[i+lookback])

    return np.array(inputs)


def create_target_tensor(data_dict):
    # Step 1: Create a dataframe with closing prices for each currency
    close_prices_df = pd.DataFrame(
        {symbol: df["close"] for symbol, df in data_dict.items()}
    ).dropna()
    # Step 2: Repeat each value three times
    # print(close_prices_df)
    target_array = np.array(
        close_prices_df.apply(
            lambda x: np.array([item for item in x for _ in range(3)]), axis=1
        ).values
    )

    return np.vstack(target_array)


def prepare_data(PATH):
    # 1. Read BTC hourly data
    btc_data = read_file("BTC", "8h")
    # Create a new index to fill missing values
    full_index = pd.date_range(btc_data.index.min(), btc_data.index.max(), freq="8H")
    df_full = pd.DataFrame(index=full_index)
    # Create the new dataframe forwarding missing values
    btc_data = df_full.merge(
        btc_data, left_index=True, right_index=True, how="left"
    ).fillna(method="ffill")
    # 2. Read other currencies' daily data
    daily_data = {}
    for symbol in SYMBOLS:
        if symbol != "BTC":
            daily_data[symbol] = read_file(f"{symbol}", "1d")

    # 3. Find overlapping date range
    min_date = btc_data.index.min()
    max_date = btc_data.index.max()
    for df in daily_data.values():
        min_date = max(min_date, df.index.min())
        max_date = min(max_date, df.index.max())

    # 4. Prune each dataset to the overlapping range
    btc_data = btc_data.loc[
        min_date - pd.Timedelta(days=250) : max_date
    ]
    for symbol in daily_data:
        daily_data[symbol] = daily_data[symbol].loc[min_date:max_date]
    # 5. Scale the BTC data and each feature separately
    scalers_btc = {}
    for col in btc_data.columns:
        scaler = MinMaxScaler()
        btc_data[col] = scaler.fit_transform(btc_data[col].values.reshape(-1, 1))
        scalers_btc[col] = scaler

    # Create input tensor from scaled BTC hourly data

    input_tensor = create_input_tensor(btc_data)
    # 6. Scale target data (Close Price) for each currency
    scalers_targets = {}
    scaled_targets = {}
    for symbol, df in daily_data.items():
        scaler = MinMaxScaler()
        scaled_data = scaler.fit_transform(df["close"].values.reshape(-1, 1))
        scaled_targets[symbol] = pd.DataFrame(
            scaled_data, columns=["close"], index=df.index
        )  # Save the scaled data as dataframe
        scalers_targets[symbol] = scaler
    # Create target tensor using scaled data
    target_tensors = create_target_tensor(scaled_targets)

    return input_tensor, target_tensors, scalers_btc, scalers_targets


# Use the function
input_data, target_data, btc_scalers, target_scalers = prepare_data(PATH)
#
# target_data = target_data.reshape(-1, 24, 1)
input_data.shape, target_data.shape

((1839, 750, 5), (1839, 24))

In [5]:
# Finding the index for an 80-20 split
index_80_percent = int(0.8 * len(input_data))

hyperparam_input = input_data[index_80_percent:]
hyperparam_target = target_data[index_80_percent:]

# 2. Splitting the 20% further into training and validation

# Finding the index for an 80-20 split within the hyperparameter data
index_hyperparam_80_percent = int(0.8 * len(hyperparam_input))

# Splitting the data
train_input = hyperparam_input[:index_hyperparam_80_percent]
train_target = hyperparam_target[:index_hyperparam_80_percent]

valid_input = hyperparam_input[index_hyperparam_80_percent:]
valid_target = hyperparam_target[index_hyperparam_80_percent:]

train_input.shape, valid_input.shape, train_target.shape, valid_target.shape

((294, 750, 5), (74, 750, 5), (294, 24), (74, 24))

In [6]:

class Time2Vec(Layer):
    def __init__(self, output_dim=None, **kwargs):
        self.output_dim = output_dim
        super(Time2Vec, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(
            name="W",
            shape=(input_shape[-1], self.output_dim),
            initializer="uniform",
            trainable=True,
        )
        self.P = self.add_weight(
            name="P",
            shape=(input_shape[1], self.output_dim),
            initializer="uniform",
            trainable=True,
        )
        self.w = self.add_weight(
            name="w", shape=(input_shape[1], 1), initializer="uniform", trainable=True
        )
        self.p = self.add_weight(
            name="p", shape=(input_shape[1], 1), initializer="uniform", trainable=True
        )
        super(Time2Vec, self).build(input_shape)

    def call(self, x):
        original = self.w * x + self.p
        sin_trans = K.sin(K.dot(x, self.W) + self.P)

        return K.concatenate([sin_trans, original], -1)


In [None]:
# No necesariamente time to vec, otros metodos, coseno dia de la semana, del mes ...
from keras.utils import plot_model

lookback = 250 * 3

tf.keras.backend.clear_session()
def build_model(hp):
    input_shape = (lookback, 5)  # for example

    input_layer = Input(shape=input_shape)
    x = Time2Vec(lookback)(input_layer)

    decoder_layer_type = hp.Choice(
        "decoder_layer_type", values=["conv", "dense"], default="conv"
    )
    num_transformer_layers = hp.Int(
        "num_transformer_layers", min_value=1, max_value=4, step=1
    )
    num_attention_heads = hp.Int(
        "num_attention_heads", min_value=1, max_value=6, step=1
    )
    dropout_rate = hp.Float("dropout_rate", min_value=0.0, max_value=0.5, step=0.1)

    for _ in range(num_transformer_layers, 0, -1):
        x = MultiHeadAttention(num_heads=num_attention_heads, key_dim=lookback)(x, x)
        x = Dropout(dropout_rate)(x)
        x = LayerNormalization()(x)

    encoder_output = x
    # Define output branches
    outputs = []
    losses = {}
    for i in range(8):
        if decoder_layer_type == "conv":
            kernel_size = hp.Int("kernel_size", min_value=16, max_value=64, step=16)
            conv_layers = hp.Int("conv_layers", min_value=1, max_value=3, step=1)
            for j in range(conv_layers, 0, -1):
                x = Conv1D(32 * (2**j), kernel_size)(encoder_output)
            x = MaxPooling1D(2)(x)
        # elif decoder_layer_type == "conv_lstm":
        #     kernel_size = hp.Int("kernel_size", min_value=2, max_value=128, step=16)
        #     conv_layers = hp.Int("conv_lstm_layers", min_value=1, max_value=3, step=1)
        #     for j in range(conv_layers, 0, -1):
        #         x = ConvLSTM1D(32 * (2**j), kernel_size)(encoder_output)
        #     x = MaxPooling1D(2)(x)
        else:
            dense_units = hp.Int("dense_units", min_value=32, max_value=128, step=32)
            dense_layers = hp.Int("dense_layers", min_value=1, max_value=3, step=1)

            for j in range(dense_layers, 0, -1):
                x = Dense(dense_units * (2**j), activation="relu")(encoder_output)
            x = BatchNormalization()(x)
        x = Flatten()(x)
        output_1 = Dense(1, name=f"output_{i}_1_prediction")(x)
        output_2 = Dense(1, name=f"output_{i}_2_quantile_05")(x)
        output_3 = Dense(1, name=f"output_{i}_3_quantile_95")(x)
        losses[f"output_{i}_1_prediction"] = "mse"
        losses[f"output_{i}_2_quantile_05"] = tfa.losses.PinballLoss(tau=0.05)
        losses[f"output_{i}_3_quantile_95"] = tfa.losses.PinballLoss(tau=0.95)
        outputs.extend([output_1, output_2, output_3])

    model = Model(inputs=input_layer, outputs=outputs)

    # Define losses
    optimizer = hp.Choice("optimizer", values=["Adam", "RMSprop"])
    opt = Adam if optimizer == "Adam" else RMSprop
    model.compile(
        loss=losses,
        optimizer=opt(
            learning_rate=hp.Float(
                "learning_rate", min_value=1e-5, max_value=1e-2, sampling="LOG"
            )
        ),
        run_eagerly=True
    )
    # model.summary()
    filename = "_".join([f"{value}_{key}" if key in ["num_transformer_layers",  "num_attention_heads", "decoder_layer_type"] else "" for key, value in hp.values.items()])
    filename = os.path.join(DRIVE_PATH, "model_images/", filename.replace("/", "-").replace(".", "-") + ".png")
    plot_model(model, to_file=filename, show_shapes=True, show_layer_names=True)
    return model


# class PrintHyperparameters(Callback):
#     def on_trial_begin(self, trial):
#         print(trial.hyperparameters.values)


# def create_dataset(input_data, target_data, batch_size, window_size):
#     # Create datasets
#     input_dataset = tf.data.Dataset.from_tensor_slices(input_data)
#     target_dataset = tf.data.Dataset.from_tensor_slices(target_data)

#     # Windowing the data. This will create windows of `window_size` for inputs and targets.
#     input_dataset = input_dataset.window(window_size, shift=1, drop_remainder=True)
#     input_dataset = input_dataset.flat_map(lambda x: x.batch(window_size))

#     target_dataset = target_dataset.window(window_size, shift=1, drop_remainder=True)
#     target_dataset = target_dataset.flat_map(lambda y: y.batch(window_size))

#     # Zip the datasets together
#     dataset = tf.data.Dataset.zip((input_dataset, target_dataset))

#     # Batching the data
#     dataset = dataset.batch(batch_size).prefetch(1)

#     return dataset


# batch_size = 4
# window_size = 10

# train_dataset = create_dataset(train_input, train_target, batch_size, window_size)
# val_dataset = create_dataset(valid_input, valid_target, batch_size, window_size)
# train_dataset = train_dataset.map(lambda x, y: (tf.reshape(x, (-1, lookback, 5)), y))
# val_dataset = val_dataset.map(lambda x, y: (tf.reshape(x, (-1, lookback, 5)), y))

# Tuner
tuner = Hyperband(
    build_model,
    objective="val_loss",
    directory=os.path.join(DRIVE_PATH, "Hyperband"),
    project_name="TimeSeries",
    executions_per_trial=3,
    max_consecutive_failed_trials=None,
    hyperband_iterations=4
)


# Pass the callback to the search method
tuner.search(
    train_input,
    train_target,
    validation_data=(valid_input, valid_target),
)



Search: Running Trial #525

Value             |Best Value So Far |Hyperparameter
conv              |conv              |decoder_layer_type
1                 |4                 |num_transformer_layers
5                 |1                 |num_attention_heads
0.4               |0.3               |dropout_rate
64                |64                |kernel_size
3                 |1                 |conv_layers
Adam              |Adam              |optimizer
0.0010793         |1.6557e-05        |learning_rate
32                |32                |dense_units
2                 |1                 |dense_layers
12                |34                |tuner/epochs
0                 |12                |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |1                 |tuner/round

Epoch 1/12
 2/10 [=====>........................] - ETA: 4s - loss: 780.2477 - output_0_1_prediction_loss: 198.8830 - output_0_2_quantile_05_loss: 0.1905 - output_0_3_quantile_95

KeyboardInterrupt: ignored

In [None]:
# Get the top 5 models' hyperparameters
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=10)
# Assume you've already run the tuning and obtained best_hyperparameters
configs = [hp.get_config()["values"] for hp in best_hyperparameters]

# Convert to pandas DataFrame
df = pd.DataFrame(configs)

# Summary of the search
tuner.results_summary()


Results summary
Results in /content/drive/MyDrive/TFM/Hyperband/TimeSeries
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 0343 summary
Hyperparameters:
decoder_layer_type: conv
num_transformer_layers: 4
num_attention_heads: 1
dropout_rate: 0.30000000000000004
kernel_size: 64
conv_layers: 1
optimizer: Adam
learning_rate: 1.6557411192660512e-05
dense_units: 32
dense_layers: 1
tuner/epochs: 34
tuner/initial_epoch: 12
tuner/bracket: 2
tuner/round: 1
tuner/trial_id: 0333
Score: 0.38658903042475384

Trial 0168 summary
Hyperparameters:
decoder_layer_type: conv
num_transformer_layers: 3
num_attention_heads: 1
dropout_rate: 0.0
kernel_size: 16
conv_layers: 2
optimizer: Adam
learning_rate: 1.1340727287390913e-05
dense_units: 96
dense_layers: 1
tuner/epochs: 34
tuner/initial_epoch: 12
tuner/bracket: 2
tuner/round: 1
tuner/trial_id: 0158
Score: 0.3868428369363149

Trial 0179 summary
Hyperparameters:
decoder_layer_type: conv
num_transformer_layers: 1
num_attention_heads: 

In [None]:
df

Unnamed: 0,decoder_layer_type,num_transformer_layers,num_attention_heads,dropout_rate,kernel_size,conv_layers,optimizer,learning_rate,dense_units,dense_layers,tuner/epochs,tuner/initial_epoch,tuner/bracket,tuner/round,tuner/trial_id
0,conv,4,1,0.3,64,1,Adam,1.7e-05,32,1,34,12,2,1,333.0
1,conv,3,1,0.0,16,2,Adam,1.1e-05,96,1,34,12,2,1,158.0
2,conv,1,3,0.4,48,2,RMSprop,3.8e-05,128,3,100,0,0,0,
3,conv,2,2,0.4,48,2,RMSprop,1.4e-05,64,1,100,0,0,0,
4,conv,1,3,0.0,16,1,Adam,4.5e-05,32,2,12,0,2,0,
5,conv,1,3,0.0,32,1,Adam,2e-05,128,3,12,4,3,1,127.0
6,conv,1,3,0.0,16,1,Adam,4.5e-05,32,2,34,12,2,1,329.0
7,conv,3,1,0.1,64,1,Adam,6.4e-05,32,3,100,0,0,0,
8,conv,4,1,0.3,64,1,Adam,1.7e-05,32,1,12,0,2,0,
9,conv,3,1,0.0,16,2,Adam,1.1e-05,96,1,12,0,2,0,


In [9]:
# No necesariamente time to vec, otros metodos, coseno dia de la semana, del mes ...
from keras.utils import plot_model

lookback = 250 * 3

tf.keras.backend.clear_session()
def build_model(hp):
    input_shape = (lookback, 5)  # for example

    input_layer = Input(shape=input_shape)
    x = Time2Vec(lookback)(input_layer)


    num_transformer_layers = hp.Int(
        "num_transformer_layers", min_value=1, max_value=4, step=1
    )
    num_attention_heads = hp.Int(
        "num_attention_heads", min_value=1, max_value=4, step=1
    )
    dropout_rate = hp.Float("dropout_rate", min_value=0.0, max_value=0.4, step=0.1)

    for _ in range(num_transformer_layers, 0, -1):
        x = MultiHeadAttention(num_heads=num_attention_heads, key_dim=lookback)(x, x)
        x = Dropout(dropout_rate)(x)
        x = LayerNormalization()(x)

    encoder_output = x
    # Define output branches
    outputs = []
    losses = {}
    for i in range(8):

        kernel_size = hp.Int("kernel_size", min_value=32, max_value=64, step=2)
        conv_layers = hp.Int("conv_layers", min_value=1, max_value=3, step=1)
        for j in range(conv_layers, 0, -1):
            x = Conv1D(32 * (2**j), kernel_size)(encoder_output)
        x = MaxPooling1D(2)(x)
        x = Flatten()(x)
        output_1 = Dense(1, name=f"output_{i}_1_prediction")(x)
        output_2 = Dense(1, name=f"output_{i}_2_quantile_05")(x)
        output_3 = Dense(1, name=f"output_{i}_3_quantile_95")(x)
        losses[f"output_{i}_1_prediction"] = "mse"
        losses[f"output_{i}_2_quantile_05"] = tfa.losses.PinballLoss(tau=0.05)
        losses[f"output_{i}_3_quantile_95"] = tfa.losses.PinballLoss(tau=0.95)
        outputs.extend([output_1, output_2, output_3])

    model = Model(inputs=input_layer, outputs=outputs)

    # Define losses

    model.compile(
        loss=losses,
        optimizer=Adam(
            learning_rate=hp.Float(
                "learning_rate", min_value=1e-5, max_value=1e-2, sampling="LOG"
            )
        ),
        run_eagerly=True
    )
    # # model.summary()
    # filename = "_".join([f"{value}_{key}" if key in ["num_transformer_layers",  "num_attention_heads", "decoder_layer_type"] else "" for key, value in hp.values.items()])
    # filename = os.path.join(DRIVE_PATH, "model_images/", filename.replace("/", "-").replace(".", "-") + ".png")
    # plot_model(model, to_file=filename, show_shapes=True, show_layer_names=True)
    return model


# class PrintHyperparameters(Callback):
#     def on_trial_begin(self, trial):
#         print(trial.hyperparameters.values)


# def create_dataset(input_data, target_data, batch_size, window_size):
#     # Create datasets
#     input_dataset = tf.data.Dataset.from_tensor_slices(input_data)
#     target_dataset = tf.data.Dataset.from_tensor_slices(target_data)

#     # Windowing the data. This will create windows of `window_size` for inputs and targets.
#     input_dataset = input_dataset.window(window_size, shift=1, drop_remainder=True)
#     input_dataset = input_dataset.flat_map(lambda x: x.batch(window_size))

#     target_dataset = target_dataset.window(window_size, shift=1, drop_remainder=True)
#     target_dataset = target_dataset.flat_map(lambda y: y.batch(window_size))

#     # Zip the datasets together
#     dataset = tf.data.Dataset.zip((input_dataset, target_dataset))

#     # Batching the data
#     dataset = dataset.batch(batch_size).prefetch(1)

#     return dataset


# batch_size = 4
# window_size = 10

# train_dataset = create_dataset(train_input, train_target, batch_size, window_size)
# val_dataset = create_dataset(valid_input, valid_target, batch_size, window_size)
# train_dataset = train_dataset.map(lambda x, y: (tf.reshape(x, (-1, lookback, 5)), y))
# val_dataset = val_dataset.map(lambda x, y: (tf.reshape(x, (-1, lookback, 5)), y))

# Tuner
tuner = BayesianOptimization(
    build_model,
    objective="val_loss",
    directory=os.path.join(DRIVE_PATH, "BayesianOptimization"),
    project_name="TimeSeries",
    executions_per_trial=2,
    max_consecutive_failed_trials=None,
    max_trials=70
)


# Pass the callback to the search method
tuner.search(
    train_input,
    train_target,
    validation_data=(valid_input, valid_target),
    epochs=500,
    callbacks=[EarlyStopping(
    monitor='val_loss',
    patience=20),
               ReduceLROnPlateau(
    monitor='val_loss',
    patience=10)]
)


Trial 70 Complete [00h 04m 48s]
val_loss: 0.3956073522567749

Best val_loss So Far: 0.3824385553598404
Total elapsed time: 01h 03m 54s


In [10]:
tuner.results_summary()

Results summary
Results in /content/drive/MyDrive/TFM/BayesianOptimization/TimeSeries
Showing 10 best trials
Objective(name="val_loss", direction="min")

Trial 61 summary
Hyperparameters:
num_transformer_layers: 4
num_attention_heads: 3
dropout_rate: 0.0
kernel_size: 32
conv_layers: 3
learning_rate: 0.00018457904350482738
Score: 0.3824385553598404

Trial 011 summary
Hyperparameters:
num_transformer_layers: 4
num_attention_heads: 1
dropout_rate: 0.0
kernel_size: 32
conv_layers: 2
learning_rate: 0.0006505963366417891
Score: 0.382909893989563

Trial 001 summary
Hyperparameters:
num_transformer_layers: 4
num_attention_heads: 1
dropout_rate: 0.0
kernel_size: 64
conv_layers: 3
learning_rate: 0.00013609878902968618
Score: 0.3840083380540212

Trial 057 summary
Hyperparameters:
num_transformer_layers: 2
num_attention_heads: 1
dropout_rate: 0.0
kernel_size: 32
conv_layers: 2
learning_rate: 3.2582487731593896e-05
Score: 0.38509513437747955

Trial 044 summary
Hyperparameters:
num_transformer_layer

In [22]:
# Get the top 20 models' hyperparameters
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=50)

# Extract hyperparameter configurations and additional information
data = []
all_trials = tuner.oracle.trials  # Get all the trials

for i, hp in enumerate(best_hyperparameters):
    config = hp.get_config()["values"]

    # Find the trial that corresponds to the current set of hyperparameters
    for trial_id, trial in all_trials.items():
        if trial.hyperparameters.get_config() == hp.get_config():
            config["loss"] = trial.score
            config["trial_number"] = trial.trial_id
            break

    data.append(config)

# Convert to pandas DataFrame
df = pd.DataFrame(data)


In [28]:
df

Unnamed: 0,num_transformer_layers,num_attention_heads,dropout_rate,kernel_size,conv_layers,learning_rate,loss,trial_number
0,4,3,0.0,32,3,0.000185,0.382439,61
1,4,1,0.0,32,2,0.000651,0.38291,11
2,4,1,0.0,64,3,0.000136,0.384008,1
3,2,1,0.0,32,2,3.3e-05,0.385095,57
4,3,4,0.0,34,2,4.5e-05,0.385503,44
5,3,2,0.0,44,3,3e-05,0.385805,43
6,1,3,0.0,32,1,0.000224,0.386282,32
7,2,1,0.0,58,3,3.1e-05,0.386564,45
8,3,1,0.0,48,1,2.1e-05,0.38664,35
9,1,3,0.0,50,2,1.6e-05,0.387105,41
