In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import keras_tuner as kt

tf.config.threading.set_intra_op_parallelism_threads(64)  # Maximize intra-op parallelism across 64 CPUs
tf.config.threading.set_inter_op_parallelism_threads(64)
# Load the data
X = pd.read_csv('X_imputed.csv')
y = pd.read_csv('y_data.csv')

# Replace missing values in y with NaN
y = y.replace("?", np.nan)  # If missing values are marked with "?" in the dataset
y_numeric = y.drop(columns=['statecounty']) # Remove 'statecounty'
# Normalize X (remove the 'statecounty' column)
X_numeric = X.drop(columns=['statecounty'])  # Remove 'statecounty'
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X_numeric), columns=X_numeric.columns)

# Split data into train and test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_numeric, test_size=0.2, random_state=42)


In [None]:
# Custom masked loss function to handle missing values in y
def masked_loss(y_true, y_pred):
    mask = tf.math.is_finite(y_true)  # Create a mask to ignore NaNs
    y_true_masked = tf.where(mask, y_true, 0.0)  # Replace NaNs with 0.0 for loss calculation
    y_pred_masked = tf.where(mask, y_pred, 0.0)
    return tf.reduce_mean(tf.square(y_true_masked - y_pred_masked))

def build_model(hp):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)))

    # Tune the number of layers and units in each layer
    for i in range(hp.Int('num_layers', 2, 3)):  # Narrow to between 2 and 3 layers
        model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
                                                     min_value=128,
                                                     max_value=256,  # Narrow units range
                                                     step=64),
                                        activation='relu'))
        model.add(tf.keras.layers.Dropout(hp.Float('dropout_' + str(i),
                                                   min_value=0.2,
                                                   max_value=0.4,
                                                   step=0.1)))

    model.add(tf.keras.layers.Dense(y_train.shape[1], activation='linear'))

    model.compile(optimizer=tf.keras.optimizers.Adam(
                      hp.Float('learning_rate', 1e-4, 1e-3, sampling='log')),  # Narrow learning rate
                  loss=masked_loss,
                  metrics=['mae'])
    return model


# Hyperparameter tuning using  RandomSearch
tuner = kt.RandomSearch(
    build_model,
    objective='val_mae',
    max_trials=20,
    executions_per_trial=2,
    directory='my_dir',
    project_name='multi_target_hyperparam_tuning',
    max_consecutive_failed_trials=10
)


Reloading Tuner from my_dir/multi_target_hyperparam_tuning/tuner0.json


In [11]:

# Search for the best hyperparameters
tuner.search(X_train, y_train, epochs=50, validation_split=0.2, batch_size=64)

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]

# Evaluate the best model
loss, mae = best_model.evaluate(X_test, y_test)
print(f"Best Model Test Loss: {loss}, Test MAE: {mae}")

# Save the best model
best_model.save('best_multi_target_model.h5')

Trial 9 Complete [00h 00m 27s]
val_mae: nan

Best val_mae So Far: nan
Total elapsed time: 00h 07m 37s

Search: Running Trial #10

Value             |Best Value So Far |Hyperparameter
3                 |2                 |num_layers
512               |320               |units_0
0.2               |0.4               |dropout_0
256               |512               |units_1
0.4               |0.2               |dropout_1
0.0028187         |0.00013819        |learning_rate
512               |256               |units_2
0.4               |0.3               |dropout_2
192               |192               |units_3
0.2               |0.2               |dropout_3

Epoch 1/50




[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 17118.6348 - mae: nan - val_loss: 3148.8933 - val_mae: nan
Epoch 2/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 4334.0322 - mae: nan - val_loss: 2742.6292 - val_mae: nan
Epoch 3/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 5999.7681 - mae: nan - val_loss: 2592.3306 - val_mae: nan
Epoch 4/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 2955.0754 - mae: nan - val_loss: 2394.2944 - val_mae: nan
Epoch 5/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 3503.5994 - mae: nan - val_loss: 2250.2263 - val_mae: nan
Epoch 6/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 4064.9221 - mae: nan - val_loss: 2227.6384 - val_mae: nan
Epoch 7/50
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 2593.7339 - mae: 

: 