In [23]:
# ── 1. Imports ─────────────────────────────────────────────────────────────
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import keras_tuner as kt

# ── 2. Load and Prepare Data ───────────────────────────────────────────────
df = pd.read_csv('Tony_data/merged_data.csv')

# Drop rows with missing values
df = df.dropna()

# Select feature columns and target
features = ['initial_claims', 'median_income', 'population', 'lfp_rate', 'Unemployment Rate']
df = df[features]

# Scale data
scaler = MinMaxScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=features)

# Create time series samples for LSTM
def create_sequences(data, window_size=12):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data.iloc[i:i+window_size].drop(columns=['Unemployment Rate']).values)
        y.append(data.iloc[i+window_size]['Unemployment Rate'])
    return np.array(X), np.array(y)

X, y = create_sequences(df_scaled)

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y, shuffle=False, test_size=0.2)

# ── 3. Define the Model Builder Function ───────────────────────────────────
def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2])))
    
    # Tune number of units
    hp_units = hp.Int('units', min_value=16, max_value=128, step=16)
    model.add(tf.keras.layers.LSTM(units=hp_units, return_sequences=False))
    
    # Optional dropout
    model.add(tf.keras.layers.Dropout(hp.Float('dropout', 0.0, 0.5, step=0.1)))
    
    model.add(tf.keras.layers.Dense(1))  # Output: unemployment_rate
    
    # Tune learning rate
    hp_lr = hp.Float('learning_rate', 1e-4, 1e-2, sampling='log')
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hp_lr),
                  loss='mse',
                  metrics=['mae'])
    return model

# ── 4. Run Keras Tuner ─────────────────────────────────────────────────────
tuner = kt.Hyperband(
    build_model,
    objective='val_loss',
    max_epochs=20,
    factor=3,
    directory='kt_lstm_dir',
    project_name='unemployment_lstm'
)

# Optional early stopping
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

# Launch tuning search
tuner.search(X_train, y_train, validation_data=(X_val, y_val), epochs=20, callbacks=[stop_early])

# ── 5. Get the Best Model ──────────────────────────────────────────────────
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, callbacks=[stop_early])

# ── 6. Evaluate the Model ──────────────────────────────────────────────────
val_loss, val_mae = model.evaluate(X_val, y_val)
print(f"Best validation MAE: {val_mae:.4f}")


Trial 30 Complete [00h 00m 15s]
val_loss: 0.0040846592746675014

Best val_loss So Far: 0.0037168492563068867
Total elapsed time: 00h 05m 12s
Epoch 1/20
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - loss: 0.0132 - mae: 0.0867 - val_loss: 0.0041 - val_mae: 0.0508
Epoch 2/20
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0050 - mae: 0.0536 - val_loss: 0.0037 - val_mae: 0.0472
Epoch 3/20
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0046 - mae: 0.0507 - val_loss: 0.0037 - val_mae: 0.0467
Epoch 4/20
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0043 - mae: 0.0492 - val_loss: 0.0038 - val_mae: 0.0470
Epoch 5/20
[1m582/582[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0039 - mae: 0.0466 - val_loss: 0.0041 - val_mae: 0.0487
[1m146/146[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 787us/step - loss: 0.0037 - mae: 