In [0]:
import optuna

In [0]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, losses, callbacks, regularizers
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import backend as K
import optuna
import os

# Ensure reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Check GPU availability
device = "/GPU:0" if tf.config.list_physical_devices('GPU') else "/CPU:0"

# # Example input (X) and target (y) for demonstration purposes
# n_samples = 16000
# n_features = 1641
# X = np.random.rand(n_samples, n_features).astype(np.float32)
# y = np.random.rand(n_samples).astype(np.float32)


# Parameters
batch_size = 256
latent_dim = 50
n_splits = 5

def build_shallow_autoencoder(input_dim, latent_dim, hidden_units):
    inputs = layers.Input(shape=(input_dim,))
    encoded = layers.Dense(hidden_units, activation='relu')(inputs)
    latent = layers.Dense(latent_dim, activation='relu')(encoded)
    decoded = layers.Dense(hidden_units, activation='relu')(latent)
    outputs = layers.Dense(input_dim)(decoded)
    autoencoder = models.Model(inputs, outputs)
    encoder = models.Model(inputs, latent)
    return autoencoder, encoder

def train_and_optimize_model(X, y, model_name, kfold):
    def objective(trial):
        latent_dim = trial.suggest_int("latent_dim", 10, 100)
        hidden_units = trial.suggest_int("hidden_units", 128, 1024)
        learning_rate = trial.suggest_loguniform("learning_rate", 1e-4, 1e-2)

        fold_results = []

        for fold, (train_idx, val_idx) in enumerate(kfold.split(X, y)):
            X_train, X_val = X[train_idx], X[val_idx]
            y_train, y_val = y[train_idx], y[val_idx]

            with tf.device(device):
                autoencoder, _ = build_shallow_autoencoder(X.shape[1], latent_dim, hidden_units)
                autoencoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                                    loss=losses.MeanSquaredError())
                es = callbacks.EarlyStopping(patience=10, restore_best_weights=True)

                history = autoencoder.fit(
                    X_train, X_train,
                    validation_data=(X_val, X_val),
                    epochs=50,
                    batch_size=batch_size,
                    callbacks=[es],
                    verbose=0
                )

                val_loss = autoencoder.evaluate(X_val, X_val, verbose=0)
                fold_results.append(val_loss)

        return np.mean(fold_results)

    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    print(f"Best trial for {model_name}: {study.best_trial.params}")
    return study.best_trial.params




In [0]:
import pandas as pd

data = pd.read_csv("../data/age_pred_dataset.csv", header=0, index_col=0, sep='\t')
data


In [0]:
y = data["age"].to_numpy()
X = data.drop(["age"], axis=1).to_numpy()
y.shape, X.shape

In [0]:

# Standardize features and target
scaler_X = StandardScaler()
scaler_y = StandardScaler()
X = scaler_X.fit_transform(X)
y = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

In [0]:
# K-Fold cross-validator
kfold = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# Optimize and train the shallow autoencoder
shallow_params = train_and_optimize_model(X, y, "Shallow_Autoencoder", kfold)

print("Optimization complete.")