# Horse ID Embedding

In [2]:
# Setup Environment
import time
from optuna.importance import MeanDecreaseImpurityImportanceEvaluator
import os
import logging
import datetime
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten
import joblib # Used for encoding horse_id
from sklearn.model_selection import KFold
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import optuna
import optuna.visualization as viz
from catboost import CatBoostRanker, CatBoostRegressor, CatBoostClassifier, Pool
import numpy as np
import itertools
import pyspark.sql.functions as F
from pyspark.sql.functions import (col, count, row_number, abs, unix_timestamp, mean, 
                                   when, lit, min as F_min, max as F_max , upper, trim,
                                   row_number, mean as F_mean, countDistinct, last, first, when)
from src.data_preprocessing.data_prep1.data_utils import initialize_environment 
# Set global references to None
spark = None
master_results_df = None
race_df = None
df = None
training_data = None
train_df = None

In [4]:
spark, jdbc_url, jdbc_properties, parquet_dir, log_file = initialize_environment()

Spark session created successfully.


In [5]:
# This dataset has already been cleaned up in the LGB notebook and saved as a starting point
# It now just needs to be converted to Panadas and run in the GBDT variant model (LGB, XGB, CatBoost)
speed_figure = spark.read.parquet("/home/exx/myCode/horse-racing/FoxRiverAIRacing/data/parquet/speed_figure.parquet")

In [6]:
speed_figure.count()

327986

In [7]:
speed_figure.dtypes

[('course_cd', 'string'),
 ('race_date', 'date'),
 ('race_number', 'double'),
 ('saddle_cloth_number', 'string'),
 ('horse_id', 'double'),
 ('horse_name', 'string'),
 ('official_fin', 'double'),
 ('purse', 'double'),
 ('weight', 'double'),
 ('date_of_birth', 'date'),
 ('sex', 'string'),
 ('equip', 'string'),
 ('claimprice', 'double'),
 ('surface', 'string'),
 ('distance_meters', 'double'),
 ('time_behind', 'double'),
 ('pace_delta_time', 'double'),
 ('speed_rating', 'double'),
 ('class_rating', 'double'),
 ('previous_class', 'double'),
 ('power', 'double'),
 ('starts', 'double'),
 ('horse_itm_percentage', 'double'),
 ('trk_cond', 'string'),
 ('med', 'string'),
 ('morn_odds', 'double'),
 ('avgspd', 'double'),
 ('race_type', 'string'),
 ('net_sentiment', 'double'),
 ('stk_clm_md', 'string'),
 ('turf_mud_mark', 'string'),
 ('avg_spd_sd', 'double'),
 ('ave_cl_sd', 'double'),
 ('hi_spd_sd', 'double'),
 ('pstyerl', 'double'),
 ('all_starts', 'double'),
 ('all_win', 'double'),
 ('all_place', 

# Switching to Pandas

In [8]:
speed_figure = speed_figure.toPandas()

                                                                                

### Set target_metric as Rank


In [9]:
# 2) Convert horse_id into integer indices
unique_horses = speed_figure["horse_id"].unique()
horse_id_to_idx = {h: i for i, h in enumerate(unique_horses)}
horse_idx = speed_figure["horse_id"].map(horse_id_to_idx)

# Use pd.concat to avoid fragmentation
speed_figure = pd.concat([speed_figure, horse_idx.rename("horse_idx")], axis=1)

In [10]:
# 3) Select numeric columns for embedding input  - 
			 
embedding_features = [
    "custom_speed_figure","off_finish_last_race","time_behind", "pace_delta_time",
    "all_starts","all_win","all_place","all_show","all_fourth",
    "sire_itm_percentage", "sire_roi", "dam_itm_percentage", "dam_roi" 
    # Possibly other horse-level stats
]

In [11]:
# Our target to predict (e.g., finishing position or next speed rating)
target_col = "perf_target"

In [12]:
# 4) Create X and y arrays
X_numerical = speed_figure[embedding_features].astype(float).values  # shape: [num_samples, num_numeric_feats]
X_horse_idx = speed_figure["horse_idx"].values  # shape: [num_samples]
y = speed_figure[target_col].values  # shape: [num_samples]


In [13]:
# Check correlations
for col in embedding_features:
    corr = speed_figure[col].corr(speed_figure["custom_speed_figure"])
    print(f"Correlation between {col} and custom_speed_figure: {corr}")


Correlation between custom_speed_figure and custom_speed_figure: 1.0
Correlation between off_finish_last_race and custom_speed_figure: -0.09845365957873714
Correlation between time_behind and custom_speed_figure: -0.2250303206500619
Correlation between pace_delta_time and custom_speed_figure: -0.08550239134847644
Correlation between all_starts and custom_speed_figure: 0.03884673055694286
Correlation between all_win and custom_speed_figure: 0.07721886854020615
Correlation between all_place and custom_speed_figure: 0.1160305483513279
Correlation between all_show and custom_speed_figure: 0.08167275243596396
Correlation between all_fourth and custom_speed_figure: 0.028804038367510394
Correlation between sire_starts and custom_speed_figure: 0.02402287773212461
Correlation between sire_win and custom_speed_figure: 0.02721248943329107
Correlation between sire_place and custom_speed_figure: 0.026872668646238353
Correlation between sire_show and custom_speed_figure: 0.025458031347754814
Correla

In [14]:
# 5) Simple train/val split (use time-based if possible!)
X_num_train, X_num_val, X_horse_train, X_horse_val, y_train, y_val = train_test_split(
    X_numerical, X_horse_idx, y, test_size=0.2, random_state=42
)

# -----------------------------------------------------------------------------
# Define the dict inputs that Keras expects
#    train_inputs and val_inputs must exist BEFORE objective() is called
# -----------------------------------------------------------------------------
train_inputs = {
    "numeric_input": X_num_train,
    "horse_id_input": X_horse_train
}
val_inputs = {
    "numeric_input": X_num_val,
    "horse_id_input": X_horse_val
}

# Building a Keras Model with an Embedding Layer

> We’ll have two inputs to our model:

    1.	horse_id input (integer indices) fed into an Embedding layer.
	2.	numeric features (like custom_speed_figure and sire/dam stats) fed into a small Dense network.

Then we’ll concatenate these two outputs and produce a regression output (1 node with a linear activation for MSE).

In [15]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

num_horses = len(unique_horses)
embedding_dim = 8  # hyperparameter you can tune
num_numeric_feats = len(embedding_features)
print(num_horses)

51588


In [17]:
# 1) Define horse_id input
horse_id_input = keras.Input(shape=(), name="horse_id_input", dtype=tf.int32)
# Embedding layer for horse IDs
horse_embedding_layer = layers.Embedding(
    input_dim=num_horses, 
    output_dim=embedding_dim, 
    name="horse_embedding"
)
horse_embedded = horse_embedding_layer(horse_id_input)  # shape: (batch, embedding_dim)

# The embedding output will be 2D [batch_size, embedding_dim].
# Optionally, you can Flatten() if you want a 1D vector
horse_embedded = layers.Flatten()(horse_embedded)


In [18]:
# 2) Define numeric input
numeric_input = keras.Input(shape=(num_numeric_feats,), name="numeric_input")
x_numeric = layers.Dense(16, activation="relu")(numeric_input)
x_numeric = layers.Dense(16, activation="relu")(x_numeric)

In [19]:
# 3) Concatenate the numeric output and the embedding
combined = layers.Concatenate()([x_numeric, horse_embedded])


In [20]:
# 4) Final output layer for regression
output = layers.Dense(1, activation="linear", name="output")(combined)



In [21]:
# 5) Build the model
model = keras.Model(
    inputs=[numeric_input, horse_id_input],
    outputs=output
)

In [22]:
# 6) Compile the model with MSE or MAE
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss="mse",  # for regression
    metrics=["mae"] 
)

model.summary()

### Summary of the Model
	•	horse_id_input -> Embedding -> Flatten -> (None, embedding_dim)
	•	numeric_input -> 2 Dense layers -> (None, 16)
	•	Concatenate -> Final dense(1) for regression.

# Train the Network
	•	We’ll feed two inputs into .fit(): one for the numeric features and another for the horse ID indices.

In [23]:
def objective(trial):
    # -----------------------------
    #  Hyperparameter Search Space
    # -----------------------------
    embedding_dim = trial.suggest_categorical("embedding_dim", [2, 4, 8, 16, 32, 64])
    n_hidden_layers = trial.suggest_int("n_hidden_layers", 1, 5)
    units = trial.suggest_int("units_per_layer", 16, 512, step=16)
    activation = trial.suggest_categorical("activation", ["relu", "selu", "tanh", "gelu", "softplus"])
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True)
    batch_size = trial.suggest_categorical("batch_size", [128, 256, 512, 1024])
    epochs = trial.suggest_int("epochs", 5, 50, step=5)

    # OPTIONAL: dropout rate
    use_dropout = trial.suggest_categorical("use_dropout", [False, True])
    dropout_rate = 0.0
    if use_dropout:
        dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.5, step=0.1)

    # -----------------------------
    #  Build the Model
    # -----------------------------
    # Horse ID input
    horse_id_input = keras.Input(shape=(), name="horse_id_input", dtype=tf.int32)

    # Numeric input
    numeric_input = keras.Input(shape=(X_num_train.shape[1],), name="numeric_input")

    # Embedding layer for horse_id
    horse_embedding_layer = layers.Embedding(
        input_dim=num_horses,  # you must define this globally or pass it in
        output_dim=embedding_dim,
        name="horse_embedding"
    )
    horse_embedded = horse_embedding_layer(horse_id_input)  # shape: [batch, 1, embedding_dim]
    horse_embedded = layers.Flatten()(horse_embedded)       # shape: [batch, embedding_dim]

    # Dense layers for numeric features
    x = numeric_input
    for _ in range(n_hidden_layers):
        x = layers.Dense(units, activation=activation)(x)
        # Optional dropout for each hidden layer
        if use_dropout:
            x = layers.Dropout(dropout_rate)(x)

    # Concatenate embedding + numeric branch
    combined = layers.Concatenate()([x, horse_embedded])
    
    # Final output (regression)
    output = layers.Dense(1, activation="linear")(combined)

    model = keras.Model([numeric_input, horse_id_input], outputs=output)

    # Compile
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="mse",
        metrics=["mae"]
    )

    # Early stopping callback (tweak if you want to let it train longer)
    early_stopping = keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=3,
        restore_best_weights=True
    )

    # -----------------------------
    #  Train
    # -----------------------------
    history = model.fit(
        train_inputs,  # {"numeric_input": X_num_train, "horse_id_input": X_horse_train}
        y_train,
        validation_data=(val_inputs, y_val),
        epochs=epochs,
        batch_size=batch_size,
        callbacks=[early_stopping],
        verbose= 50
    )

    # Evaluate on validation set
    val_loss, val_mae = model.evaluate(val_inputs, y_val, verbose=0)
    return val_loss  # Minimizing MSE

In [24]:
# Suppose we have:
# - num_horses = length of the unique horse IDs
# - X_num_train, X_horse_train, y_train
# - X_num_val, X_horse_val, y_val
# and we have train_inputs = {"numeric_input": X_num_train, "horse_id_input": X_horse_train}
#                    val_inputs = {"numeric_input": X_num_val,   "horse_id_input": X_horse_val}

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=50)  # or more, if you can afford it

print("Best Trial:")
best_trial = study.best_trial
print(f"  Value (Val MSE): {best_trial.value}")
for key, value in best_trial.params.items():
    print(f"    {key}: {value}")

[I 2025-01-20 16:29:38,429] A new study created in memory with name: no-name-57f0306a-d8af-4f38-bf91-5cf50453208d


Epoch 1/45


I0000 00:00:1737412179.062022 3847292 service.cc:146] XLA service 0x7f3c1c004e30 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1737412179.062047 3847292 service.cc:154]   StreamExecutor device (0): NVIDIA RTX A6000, Compute Capability 8.6
I0000 00:00:1737412179.062050 3847292 service.cc:154]   StreamExecutor device (1): NVIDIA RTX A6000, Compute Capability 8.6
2025-01-20 16:29:39.083065: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2025-01-20 16:29:39.168792: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 90300
I0000 00:00:1737412179.957947 3847292 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45


[I 2025-01-20 16:29:46,851] Trial 0 finished with value: 146.48037719726562 and parameters: {'embedding_dim': 4, 'n_hidden_layers': 3, 'units_per_layer': 128, 'activation': 'relu', 'learning_rate': 1.4108559232679512e-05, 'batch_size': 512, 'epochs': 45, 'use_dropout': True, 'dropout_rate': 0.1}. Best is trial 0 with value: 146.48037719726562.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2025-01-20 16:29:54,237] Trial 1 finished with value: 4.528083801269531 and parameters: {'embedding_dim': 8, 'n_hidden_layers': 4, 'units_per_layer': 16, 'activation': 'selu', 'learning_rate': 0.003180607323215678, 'batch_size': 256, 'epochs': 5, 'use_dropout': False}. Best is trial 1 with value: 4.528083801269531.


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35


[I 2025-01-20 16:30:27,707] Trial 2 finished with value: 2.492488145828247 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 128, 'activation': 'softplus', 'learning_rate': 1.4774072288108943e-05, 'batch_size': 128, 'epochs': 35, 'use_dropout': False}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10


[I 2025-01-20 16:30:32,038] Trial 3 finished with value: 2.861370801925659 and parameters: {'embedding_dim': 16, 'n_hidden_layers': 1, 'units_per_layer': 448, 'activation': 'softplus', 'learning_rate': 0.009240504003468137, 'batch_size': 512, 'epochs': 10, 'use_dropout': False}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2025-01-20 16:30:46,444] Trial 4 finished with value: 45.403507232666016 and parameters: {'embedding_dim': 4, 'n_hidden_layers': 5, 'units_per_layer': 288, 'activation': 'softplus', 'learning_rate': 7.591212694024137e-05, 'batch_size': 128, 'epochs': 5, 'use_dropout': True, 'dropout_rate': 0.2}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


[I 2025-01-20 16:31:03,549] Trial 5 finished with value: 2.7310216426849365 and parameters: {'embedding_dim': 4, 'n_hidden_layers': 3, 'units_per_layer': 336, 'activation': 'gelu', 'learning_rate': 0.0001899905509105734, 'batch_size': 128, 'epochs': 30, 'use_dropout': False}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15


[I 2025-01-20 16:31:13,611] Trial 6 finished with value: 2.7299487590789795 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 160, 'activation': 'selu', 'learning_rate': 0.00017543836085798852, 'batch_size': 128, 'epochs': 15, 'use_dropout': False}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


[I 2025-01-20 16:31:22,425] Trial 7 finished with value: 3.8805441856384277 and parameters: {'embedding_dim': 64, 'n_hidden_layers': 3, 'units_per_layer': 16, 'activation': 'relu', 'learning_rate': 0.0005658725889471679, 'batch_size': 256, 'epochs': 5, 'use_dropout': False}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


[I 2025-01-20 16:31:36,103] Trial 8 finished with value: 11.437106132507324 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 3, 'units_per_layer': 80, 'activation': 'softplus', 'learning_rate': 0.0013055278045958421, 'batch_size': 256, 'epochs': 10, 'use_dropout': True, 'dropout_rate': 0.5}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/20


















Epoch 2/20
Epoch 3/20
Epoch 4/20




[I 2025-01-20 16:31:45,991] Trial 9 finished with value: 6.837271690368652 and parameters: {'embedding_dim': 16, 'n_hidden_layers': 3, 'units_per_layer': 416, 'activation': 'tanh', 'learning_rate': 0.06035810527013214, 'batch_size': 256, 'epochs': 20, 'use_dropout': False}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


[I 2025-01-20 16:32:00,146] Trial 10 finished with value: 38.58899688720703 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 208, 'activation': 'gelu', 'learning_rate': 1.3184406513208591e-05, 'batch_size': 1024, 'epochs': 40, 'use_dropout': True, 'dropout_rate': 0.5}. Best is trial 2 with value: 2.492488145828247.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30


[I 2025-01-20 16:32:30,420] Trial 11 finished with value: 2.421919345855713 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 176, 'activation': 'selu', 'learning_rate': 7.278959095852662e-05, 'batch_size': 128, 'epochs': 30, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30


[I 2025-01-20 16:32:58,035] Trial 12 finished with value: 2.6893932819366455 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 2, 'units_per_layer': 224, 'activation': 'selu', 'learning_rate': 4.763653881948557e-05, 'batch_size': 128, 'epochs': 30, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/35







Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35



[I 2025-01-20 16:33:59,905] Trial 13 finished with value: 3.7451601028442383 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 2, 'units_per_layer': 128, 'activation': 'tanh', 'learning_rate': 3.506323607590197e-05, 'batch_size': 128, 'epochs': 35, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50


[I 2025-01-20 16:34:03,952] Trial 14 finished with value: 3.40604567527771 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 2, 'units_per_layer': 304, 'activation': 'softplus', 'learning_rate': 0.00025915270310749303, 'batch_size': 1024, 'epochs': 50, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25


[I 2025-01-20 16:34:28,267] Trial 15 finished with value: 2.4645445346832275 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 1, 'units_per_layer': 208, 'activation': 'selu', 'learning_rate': 3.296152466343257e-05, 'batch_size': 128, 'epochs': 25, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


[I 2025-01-20 16:34:59,285] Trial 16 finished with value: 2.9551384449005127 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 2, 'units_per_layer': 240, 'activation': 'selu', 'learning_rate': 7.610374506833691e-05, 'batch_size': 128, 'epochs': 20, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25


[I 2025-01-20 16:35:10,127] Trial 17 finished with value: 3.2415659427642822 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 1, 'units_per_layer': 384, 'activation': 'selu', 'learning_rate': 0.0007522838618342851, 'batch_size': 128, 'epochs': 25, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25


[I 2025-01-20 16:35:16,994] Trial 18 finished with value: 103.20338439941406 and parameters: {'embedding_dim': 64, 'n_hidden_layers': 4, 'units_per_layer': 192, 'activation': 'selu', 'learning_rate': 2.8938817422977393e-05, 'batch_size': 1024, 'epochs': 25, 'use_dropout': True, 'dropout_rate': 0.30000000000000004}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35


[I 2025-01-20 16:35:23,350] Trial 19 finished with value: 4.062744617462158 and parameters: {'embedding_dim': 8, 'n_hidden_layers': 2, 'units_per_layer': 352, 'activation': 'selu', 'learning_rate': 0.00010595485147090932, 'batch_size': 512, 'epochs': 35, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


[I 2025-01-20 16:35:32,374] Trial 20 finished with value: 2.727248430252075 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 1, 'units_per_layer': 64, 'activation': 'selu', 'learning_rate': 0.0032619740420946615, 'batch_size': 128, 'epochs': 20, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35


[I 2025-01-20 16:36:12,261] Trial 21 finished with value: 2.4600136280059814 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 160, 'activation': 'softplus', 'learning_rate': 1.2781060224529147e-05, 'batch_size': 128, 'epochs': 35, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


[I 2025-01-20 16:37:16,654] Trial 22 finished with value: 3.426013946533203 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 272, 'activation': 'tanh', 'learning_rate': 3.155091260342362e-05, 'batch_size': 128, 'epochs': 40, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30


[I 2025-01-20 16:37:40,325] Trial 23 finished with value: 2.660593271255493 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 2, 'units_per_layer': 176, 'activation': 'relu', 'learning_rate': 1.1024947418936435e-05, 'batch_size': 128, 'epochs': 30, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40


[I 2025-01-20 16:37:53,340] Trial 24 finished with value: 2.7347187995910645 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 496, 'activation': 'gelu', 'learning_rate': 0.00036070857730578396, 'batch_size': 128, 'epochs': 40, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25


[I 2025-01-20 16:38:25,038] Trial 25 finished with value: 2.546018600463867 and parameters: {'embedding_dim': 8, 'n_hidden_layers': 2, 'units_per_layer': 80, 'activation': 'softplus', 'learning_rate': 2.544106012139584e-05, 'batch_size': 128, 'epochs': 25, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35


[I 2025-01-20 16:39:25,161] Trial 26 finished with value: 3.2185940742492676 and parameters: {'embedding_dim': 64, 'n_hidden_layers': 1, 'units_per_layer': 240, 'activation': 'selu', 'learning_rate': 0.00010176194637858921, 'batch_size': 128, 'epochs': 35, 'use_dropout': True, 'dropout_rate': 0.30000000000000004}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 12/45
Epoch 13/45
Epoch 14/45


[I 2025-01-20 16:39:32,874] Trial 27 finished with value: 3.721729278564453 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 4, 'units_per_layer': 160, 'activation': 'selu', 'learning_rate': 5.13190324680967e-05, 'batch_size': 1024, 'epochs': 45, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30


[I 2025-01-20 16:39:49,339] Trial 28 finished with value: 2.609168767929077 and parameters: {'embedding_dim': 16, 'n_hidden_layers': 5, 'units_per_layer': 112, 'activation': 'softplus', 'learning_rate': 2.1167878135310495e-05, 'batch_size': 512, 'epochs': 30, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45


[I 2025-01-20 16:39:57,383] Trial 29 finished with value: 375.3724060058594 and parameters: {'embedding_dim': 4, 'n_hidden_layers': 2, 'units_per_layer': 160, 'activation': 'relu', 'learning_rate': 1.0176614062144088e-05, 'batch_size': 512, 'epochs': 45, 'use_dropout': True, 'dropout_rate': 0.4}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25


[I 2025-01-20 16:40:09,101] Trial 30 finished with value: 2.8266379833221436 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 208, 'activation': 'gelu', 'learning_rate': 0.032656563213633985, 'batch_size': 128, 'epochs': 25, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35


[I 2025-01-20 16:40:30,185] Trial 31 finished with value: 2.5773022174835205 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 112, 'activation': 'softplus', 'learning_rate': 1.743989551108015e-05, 'batch_size': 128, 'epochs': 35, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40


[I 2025-01-20 16:41:04,071] Trial 32 finished with value: 2.475131034851074 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 128, 'activation': 'softplus', 'learning_rate': 1.9054784201567596e-05, 'batch_size': 128, 'epochs': 40, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 12/45
Epoch 13/45
Epoch 14/45
Epoch 15/45


[I 2025-01-20 16:41:29,669] Trial 33 finished with value: 2.4652035236358643 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 48, 'activation': 'softplus', 'learning_rate': 5.946533719560298e-05, 'batch_size': 128, 'epochs': 45, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50


[I 2025-01-20 16:42:09,105] Trial 34 finished with value: 2.482337236404419 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 48, 'activation': 'softplus', 'learning_rate': 5.65808299711954e-05, 'batch_size': 128, 'epochs': 50, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/45
Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 12/45
Epoch 13/45
Epoch 14/45
Epoch 15/45


[I 2025-01-20 16:42:34,763] Trial 35 finished with value: 2.765869617462158 and parameters: {'embedding_dim': 8, 'n_hidden_layers': 1, 'units_per_layer': 48, 'activation': 'softplus', 'learning_rate': 0.00013630863478924905, 'batch_size': 128, 'epochs': 45, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50


[I 2025-01-20 16:42:56,114] Trial 36 finished with value: 2.8495049476623535 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 2, 'units_per_layer': 256, 'activation': 'selu', 'learning_rate': 0.001595154125266051, 'batch_size': 256, 'epochs': 50, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15


[I 2025-01-20 16:43:11,072] Trial 37 finished with value: 2.576509952545166 and parameters: {'embedding_dim': 4, 'n_hidden_layers': 1, 'units_per_layer': 96, 'activation': 'softplus', 'learning_rate': 4.330416995926999e-05, 'batch_size': 128, 'epochs': 15, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


[I 2025-01-20 16:43:27,757] Trial 38 finished with value: 4.240137100219727 and parameters: {'embedding_dim': 16, 'n_hidden_layers': 1, 'units_per_layer': 16, 'activation': 'relu', 'learning_rate': 0.00025265503490195786, 'batch_size': 512, 'epochs': 30, 'use_dropout': True, 'dropout_rate': 0.1}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/45







Epoch 2/45
Epoch 3/45
Epoch 4/45
Epoch 5/45
Epoch 6/45
Epoch 7/45
Epoch 8/45
Epoch 9/45
Epoch 10/45
Epoch 11/45
Epoch 12/45
Epoch 13/45
Epoch 14/45
Epoch 15/45
Epoch 16/45
Epoch 17/45
Epoch 18/45
Epoch 19/45
Epoch 20/45
Epoch 21/45
Epoch 22/45


[I 2025-01-20 16:44:07,677] Trial 39 finished with value: 3.3497562408447266 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 2, 'units_per_layer': 144, 'activation': 'tanh', 'learning_rate': 7.750737956159526e-05, 'batch_size': 128, 'epochs': 45, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


[I 2025-01-20 16:44:13,703] Trial 40 finished with value: 3.498291254043579 and parameters: {'embedding_dim': 32, 'n_hidden_layers': 1, 'units_per_layer': 320, 'activation': 'selu', 'learning_rate': 0.0054779709292813285, 'batch_size': 256, 'epochs': 20, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40


[I 2025-01-20 16:44:33,654] Trial 41 finished with value: 2.5936174392700195 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 192, 'activation': 'softplus', 'learning_rate': 1.9276715775723e-05, 'batch_size': 128, 'epochs': 40, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35


[I 2025-01-20 16:44:56,380] Trial 42 finished with value: 2.6178486347198486 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 144, 'activation': 'softplus', 'learning_rate': 1.6831990108378794e-05, 'batch_size': 128, 'epochs': 35, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40


[I 2025-01-20 16:45:26,731] Trial 43 finished with value: 2.501538038253784 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 112, 'activation': 'softplus', 'learning_rate': 2.768854784418818e-05, 'batch_size': 128, 'epochs': 40, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30


[I 2025-01-20 16:45:46,417] Trial 44 finished with value: 2.4671530723571777 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 176, 'activation': 'softplus', 'learning_rate': 5.8882348105164556e-05, 'batch_size': 128, 'epochs': 30, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30


[I 2025-01-20 16:45:57,115] Trial 45 finished with value: 2.9753596782684326 and parameters: {'embedding_dim': 64, 'n_hidden_layers': 2, 'units_per_layer': 224, 'activation': 'softplus', 'learning_rate': 0.00016346571859342444, 'batch_size': 128, 'epochs': 30, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25


[I 2025-01-20 16:46:15,117] Trial 46 finished with value: 2.4521121978759766 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 1, 'units_per_layer': 272, 'activation': 'softplus', 'learning_rate': 6.736662186955255e-05, 'batch_size': 128, 'epochs': 25, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


[I 2025-01-20 16:46:23,691] Trial 47 finished with value: 40.282161712646484 and parameters: {'embedding_dim': 4, 'n_hidden_layers': 3, 'units_per_layer': 288, 'activation': 'gelu', 'learning_rate': 0.0003838586517482004, 'batch_size': 1024, 'epochs': 15, 'use_dropout': True, 'dropout_rate': 0.4}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25


[I 2025-01-20 16:46:32,874] Trial 48 finished with value: 4.325494289398193 and parameters: {'embedding_dim': 2, 'n_hidden_layers': 5, 'units_per_layer': 256, 'activation': 'selu', 'learning_rate': 0.00010023955396939889, 'batch_size': 256, 'epochs': 25, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


[I 2025-01-20 16:47:06,173] Trial 49 finished with value: 3.3717682361602783 and parameters: {'embedding_dim': 16, 'n_hidden_layers': 1, 'units_per_layer': 288, 'activation': 'tanh', 'learning_rate': 3.9044498245324706e-05, 'batch_size': 128, 'epochs': 20, 'use_dropout': False}. Best is trial 11 with value: 2.421919345855713.


Best Trial:
  Value (Val MSE): 2.421919345855713
    embedding_dim: 2
    n_hidden_layers: 1
    units_per_layer: 176
    activation: selu
    learning_rate: 7.278959095852662e-05
    batch_size: 128
    epochs: 30
    use_dropout: False


# Train a Final Model with Best Hyperparams (Optional)

After you find the best hyperparameters, you can build a final model using those hyperparams and optionally train it on the combined (train+val) set or just the train set:

In [None]:
best_params = study.best_params

# Rebuild the model with best params:
embedding_dim = best_params["embedding_dim"]
n_hidden_layers = best_params["n_hidden_layers"]
units = best_params["units_per_layer"]
activation = best_params["activation"]
learning_rate = best_params["learning_rate"]
batch_size = best_params["batch_size"]
epochs = best_params["epochs"]

horse_id_input = keras.Input(shape=(), name="horse_id_input", dtype=tf.int32)
numeric_input  = keras.Input(shape=(X_num_train.shape[1],), name="numeric_input")

horse_embedding_layer = layers.Embedding(input_dim=num_horses, output_dim=embedding_dim, name="horse_embedding")
horse_embedded = horse_embedding_layer(horse_id_input)
horse_embedded = layers.Flatten()(horse_embedded)

x = numeric_input
for _ in range(n_hidden_layers):
    x = layers.Dense(units, activation=activation)(x)

combined = layers.Concatenate()([x, horse_embedded])
output = layers.Dense(1, activation="linear", name="output")(combined)
final_model = keras.Model(inputs=[numeric_input, horse_id_input], outputs=output)

final_model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
    loss="mse",
    metrics=["mae"]
)

# Train (optionally use all data or keep same splits)
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    patience=3,
    restore_best_weights=True
)
final_model.fit(
    train_inputs, y_train,
    validation_data=(val_inputs, y_val),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[early_stopping],
    verbose=1
)

# Evaluate final
val_loss, val_mae = final_model.evaluate(val_inputs, y_val, verbose=0)
print(f"Final Model - Val MSE: {val_loss:.4f}, Val MAE: {val_mae:.4f}")

# Extract Embeddings
	•	After training, extract the weights from the Embedding layer.
	•	Save these embeddings for use in the CatBoost model.

In [None]:
# The embedding weights (shape: [num_horses, embedding_dim])
embedding_weights = horse_embedding_layer.get_weights()[0]
# This is a numpy array of shape (num_horses, embedding_dim)


# We already have a mapping from horse_id to the row index in that 
# embedding matrix (horse_id_to_idx). Let’s invert that dictionary 
# to reconstruct each horse’s embedding:

idx_to_horse_id = {v: k for k, v in horse_id_to_idx.items()}

embed_list = []
for i in range(num_horses):
    horse_id = idx_to_horse_id[i]
    emb_vec = embedding_weights[i].tolist()  # convert to Python list
    embed_list.append([horse_id] + emb_vec)

# Create a DataFrame with columns: ["horse_id", "embed_0", ..., "embed_7"]
embed_cols = ["horse_id"] + [f"embed_{k}" for k in range(embedding_dim)]
embed_df = pd.DataFrame(embed_list, columns=embed_cols)

print(embed_df.head())


*Note: Now embed_df has one row per horse_id with its learned embedding vector.

# Merging Embeddings Back into Your Main Data

Finally, you can join embed_df with your main “training” DataFrame for CatBoost:

In [None]:
# If your main data is still in df, merge on horse_id:
df_final = pd.merge(
    speed_figure,       # original DataFrame with race-level rows
    embed_df, # the embedding vectors
    on="horse_id",
    how="left"
)

# Save to Parquet or CSV
df_final.to_parquet("/home/exx/myCode/horse-racing/FoxRiverAIRacing/data/parquet/CatBoost_Embedding_data.parquet")