In [2]:
## Only use this for Google Colab
from google.colab import drive
drive.mount('/content/drive')
import os
os.chdir('drive/MyDrive/oldAIboy/houseprice')

!pip install category_encoders
!pip install optuna

Mounted at /content/drive
Collecting category_encoders
  Downloading category_encoders-2.8.1-py3-none-any.whl.metadata (7.9 kB)
Downloading category_encoders-2.8.1-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: category_encoders
Successfully installed category_encoders-2.8.1
Collecting optuna
  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.1-py3-none-any.whl.metadata (7.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.2.1-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.6/383.6 kB[0m [31m40.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.1

### XGBoost regressor

#### Setup modules

In [3]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.stats import uniform, randint, loguniform

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler, PowerTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

from xgboost import XGBClassifier, DMatrix, XGBRegressor
from category_encoders import TargetEncoder
import pickle

import sys
print(sys.executable)

def data_preprocessing(traindataloc = "data/train.csv", testdataloc = "data/test.csv"):
  ## read in the data
  traindata = pd.read_csv(traindataloc)
  print("Full train dataset shape is {}".format(traindata.shape))
  testdata = pd.read_csv(testdataloc)
  print("Full test dataset shape is {}".format(testdata.shape))

  ##separate X and y: create X_train_combo and ylog (X_train_combo will be split into X_train and X_val later)
  X_mlb = traindata.drop('SalePrice', axis=1)
  X_mlb.set_index('Id', inplace=True)
  ylog = np.log(traindata['SalePrice']) ## apply log to target based on competition host's request
  ## set up the dataset X_test for final submission
  X_test = testdata.copy(deep=True) ##create a data copy
  X_test.set_index('Id', inplace=True)

  ylog.index=X_mlb.index

  ## feature cleaning
  ## drop 2 categorical features with super high NA%
  X_mlb.drop(['PoolQC','MiscFeature'], axis=1, inplace=True)
  ### fill NA for a bunch of categorical features - fill with 'None'
  catna_none = ['Alley','Fence','MasVnrType', 'FireplaceQu', 'GarageType', 'GarageQual', 'GarageCond', 'GarageFinish', 'BsmtFinType2', 'BsmtExposure', 'BsmtFinType1', 'BsmtQual', 'BsmtCond']
  X_mlb[catna_none] = X_mlb[catna_none].fillna('None')
  ### fill na for cat features 'Electrical' - fill with most frequent value
  most_frequent_value = X_mlb['Electrical'].mode()[0]
  X_mlb['Electrical'] = X_mlb['Electrical'].fillna(most_frequent_value)
  ### fill NA for 2 numerical features
  X_mlb['GarageYrBlt'] = X_mlb['GarageYrBlt'].fillna(1700) ##fill na of garageyrblt with 1700
  X_mlb['MasVnrArea'] = X_mlb['MasVnrArea'].fillna(0) ##fill na of MasVnrArea with 0
  ##fill na of LotFrontage with mean
  X_mlb['LotFrontage'] = X_mlb['LotFrontage'].fillna(X_mlb['LotFrontage'].mean())
  print("Full train dataset shape after cleaning is {}".format(X_mlb.shape))
  print("Full test dataset shape after cleaning is {}".format(X_test.shape))
  return X_mlb, ylog, X_test

def mean_encoding(X_mlb, ylog):
  # Split the data into training and validation sets with fixed validation size of 200 samples
  X_train, X_valid, y_train, y_valid = train_test_split(X_mlb, ylog, test_size=200, random_state=42)

  # Get list of categorical columns from Xcombo
  catcollist = X_mlb.select_dtypes(include=['object']).columns.tolist()

  # Apply mean encoding on the training set
  mean_encoder = TargetEncoder(cols=catcollist)
  X_train_encoded = mean_encoder.fit_transform(X_train, y_train)
  # Transform the test set using the fitted mean encoder
  X_valid_encoded = mean_encoder.transform(X_valid)

  print("Encoded Training Set shape: ", X_train_encoded.shape)
  print("Encoded Valid Set shape: ", X_valid_encoded.shape)
  return mean_encoder,X_train_encoded, X_valid_encoded, y_train, y_valid

def feature_selection(X_train_encoded, X_valid_encoded, y_train):
  # Initialize the model
  xgbmodel = XGBRegressor(objective='reg:squarederror',
                          n_estimators=100,
                          learning_rate=0.1,
                          eval_metric='rmse',
                          n_jobs=-1)

  # Perform RFE
  selector = RFE(estimator=xgbmodel, n_features_to_select=60, step=1)
  selector = selector.fit(X_train_encoded, y_train)

  # Get the selected features
  selected_features = X_train_encoded.columns[selector.support_]
  X_train_RFE = selector.transform(X_train_encoded)
  X_valid_RFE = selector.transform(X_valid_encoded)

  print("RFE Training Set shape: ", X_train_RFE.shape)
  print("RFE Valid Set shape: ", X_valid_RFE.shape)
  return selector, selected_features,X_train_RFE, X_valid_RFE

def model_training(model, param_dist, n_iter, cv, scoring, X_train_RFE, y_train, X_valid_RFE, y_valid):
  # Initialize RandomizedSearchCV
  random_search_rfe = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=n_iter, cv=cv, scoring=scoring, random_state=42, verbose=2, n_jobs=3)
  # Fit the model
  random_search_rfe.fit(X_train_RFE, y_train, eval_set=[(X_valid_RFE, y_valid)],verbose=False)
  # Print the best score
  print("Best score: ", np.sqrt((-random_search_rfe.best_score_)))
  # Print the best parameters
  print("Best parameters found: ", random_search_rfe.best_params_)
  # Use the best model to make predictions
  tunedmodel_rfe = random_search_rfe.best_estimator_
  # Evaluate the model using RMSE
  y_pred = tunedmodel_rfe.predict(X_valid_RFE)
  rmse = np.sqrt(mean_squared_error(y_valid, y_pred))
  print(f'Root Mean Squared Error for validset: {rmse:.4f}')
  ytrain_pred = tunedmodel_rfe.predict(X_train_RFE)
  rmse_train = np.sqrt(mean_squared_error(y_train, ytrain_pred))
  print(f'Root Mean Squared Error for trainset: {rmse_train:.4f}')
  return random_search_rfe.best_params_

def test_data_cleaning(X_test, mean_encoder, selector):
  ## drop 2 categorical features with super high NA%
  ## drop 2 categorical features with super high NA%
  X_test2 = X_test.copy(deep=True)
  X_test2.drop(['PoolQC','MiscFeature'], axis=1, inplace=True)
  ### fill NA for a bunch of categorical features - fill with 'None'
  catna_none = ['Alley','Fence','MasVnrType', 'FireplaceQu', 'GarageType', 'GarageQual', 'GarageCond', 'GarageFinish', 'BsmtFinType2', 'BsmtExposure', 'BsmtFinType1', 'BsmtQual', 'BsmtCond']
  X_test2[catna_none] = X_test2[catna_none].fillna('None')
  ### fill na for cat features 'Electrical' - fill with most frequent value
  most_frequent_value = X_test2['Electrical'].mode()[0]
  X_test2['Electrical'] = X_test2['Electrical'].fillna(most_frequent_value)
  ### fill NA for 2 numerical features
  X_test2['GarageYrBlt'] = X_test2['GarageYrBlt'].fillna(1700) ##fill na of garageyrblt with 1700
  X_test2['MasVnrArea'] = X_test2['MasVnrArea'].fillna(0) ##fill na of MasVnrArea with 0
  ##fill na of LotFrontage with mean
  X_test2['LotFrontage'] = X_test2['LotFrontage'].fillna(X_test2['LotFrontage'].mean())

  ## implement mean-encoding
  X_test_encoded = mean_encoder.transform(X_test2)
  print("Training Set shape: ", X_train_encoded.shape)
  print("Valid Set shape: ", X_valid_encoded.shape)
  print("Test Set shape: ", X_test_encoded.shape)
  ##implement RFE selection
  X_test_RFE = selector.transform(X_test_encoded)
  print(X_test_RFE.shape)
  return X_test_RFE

def model_inference(X_test, X_test_RFE, selected_features, X_train_RFE, X_valid_RFE, y_train, y_valid, best_params):
  ## combine the train data and valid data
  X_train_RFEdf = pd.DataFrame(X_train_RFE, columns=selected_features)
  X_valid_RFEdf = pd.DataFrame(X_valid_RFE, columns=selected_features)
  X_trainrfe_combo = pd.concat([X_train_RFEdf, X_valid_RFEdf], axis=0)
  ## combine the train data and valid data
  y_train_RFE = pd.DataFrame(y_train)
  y_valid_RFE = pd.DataFrame(y_valid)
  y_trainrfe_combo = pd.concat([y_train_RFE, y_valid_RFE], axis=0, ignore_index=True)

  # Retrieve the best parameters from RandomizedSearchCV
  additional_params = {
      'objective':'reg:squarederror',
      'eval_metric':'rmse'
  }
  # Update the best parameters with the new ones
  best_params.update(additional_params)
  # Create a new model with the updated parameters
  model_inference = XGBRegressor(**best_params)
  # Train the final model on the combined dataset (or any dataset you choose)
  model_inference.fit(X_trainrfe_combo, y_trainrfe_combo)
  ytrain_pred_rfe = model_inference.predict(X_trainrfe_combo)
  rmse_train = np.sqrt(mean_squared_error(y_trainrfe_combo, ytrain_pred_rfe))
  print(f'Root Mean Squared Error for trainset: {rmse_train:.4f}')

  y_pred_test = model_inference.predict(X_test_RFE)
  testpred = np.exp(y_pred_test)
  print(f'average sales prediction: {np.mean(testpred)}')

  test_id=X_test.index.tolist()
  sub = pd.DataFrame()
  sub['Id'] = test_id
  sub['SalePrice'] = testpred
  print(f'snapshot of inference results: {sub.head()}')
  return sub

/usr/bin/python3


#### Implement XGBregressor

In [4]:
%%time
## data preprocessing, feature enginnering, feature selection
X_mlb, ylog, X_test = data_preprocessing()
mean_encoder, X_train_encoded, X_valid_encoded, y_train, y_valid = mean_encoding(X_mlb, ylog)
selector, selected_features, X_train_RFE, X_valid_RFE = feature_selection(X_train_encoded, X_valid_encoded, y_train)

Full train dataset shape is (1460, 81)
Full test dataset shape is (1459, 80)
Full train dataset shape after cleaning is (1460, 77)
Full test dataset shape after cleaning is (1459, 79)
Encoded Training Set shape:  (1260, 77)
Encoded Valid Set shape:  (200, 77)
RFE Training Set shape:  (1260, 60)
RFE Valid Set shape:  (200, 60)
CPU times: user 55.3 s, sys: 5.43 ms, total: 55.3 s
Wall time: 6.7 s


In [17]:
%%time
## data preprocessing, feature enginnering, feature selection
X_mlb, ylog, X_test = data_preprocessing()
mean_encoder, X_train_encoded, X_valid_encoded, y_train, y_valid = mean_encoding(X_mlb, ylog)
selector, selected_features, X_train_RFE, X_valid_RFE = feature_selection(X_train_encoded, X_valid_encoded, y_train)

## Model training with parameter tuning
model = XGBRegressor(objective='reg:squarederror',eval_metric='rmse')
n_iter=50
cv=3
scoring='neg_mean_squared_error'
# Define the parameter distribution
param_dist = { 'n_estimators': randint(150, 500)
                ,'max_depth': randint(3, 10)  # Randomly pick between 3 and 10
                ,'learning_rate': loguniform(0.005, 0.3)  # Log-uniform distribution between 0.01 and 0.3
                ,'subsample': uniform(0.5, 0.5)  # Randomly pick between 0.6 and 1.0
                ,'colsample_bytree': uniform(0.4, 0.6)  # Randomly pick between 0.6 and 1.0
                ,'gamma': uniform(0, 0.5)  # Randomly pick between 0 and 0.5
                ,'min_child_weight': randint(1, 6)  # Randomly pick between 1 and 5
                #,'reg_alpha': uniform(0, 10)  # Randomly pick between 0 and 10
                #,'reg_lambda': uniform(0, 10)  # Randomly pick between 0 and 10
             }

best_params = model_training(model, param_dist, n_iter, cv, scoring, X_train_RFE, y_train, X_valid_RFE, y_valid)

## Model Inference
X_test_RFE = test_data_cleaning(X_test, mean_encoder, selector)
pred_sub=model_inference(X_test, X_test_RFE, selected_features, X_train_RFE, X_valid_RFE, y_train, y_valid, best_params)

Full train dataset shape is (1460, 81)
Full test dataset shape is (1459, 80)
Full train dataset shape after cleaning is (1460, 77)
Full test dataset shape after cleaning is (1459, 79)
Encoded Training Set shape:  (1260, 77)
Encoded Valid Set shape:  (200, 77)
RFE Training Set shape:  (1260, 60)
RFE Valid Set shape:  (200, 60)
Fitting 3 folds for each of 50 candidates, totalling 150 fits
Best score:  0.12653205175949037
Best parameters found:  {'colsample_bytree': np.float64(0.916438349953806), 'gamma': np.float64(0.0034760652655953517), 'learning_rate': np.float64(0.04047212046195181), 'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 374, 'subsample': np.float64(0.5599326836668415)}
Root Mean Squared Error for validset: 0.1270
Root Mean Squared Error for trainset: 0.0753
Training Set shape:  (1260, 77)
Valid Set shape:  (200, 77)
Test Set shape:  (1459, 77)
(1459, 60)
Root Mean Squared Error for trainset: 0.0794
average sales prediction: 177583.6875
snapshot of inference results:

### Deep Neutral Network
#### DNN is famous for creating new features and handling feature selection, let's give it a try!

#### Some preparation - import library, build pytorch data loader

In [8]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import optuna
import pickle
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

batch_size = 32         # Batch size

# ====== Data Pipeline - normalize and pack train/valid data to Loaders ======
def dnn_loader (batch_size, X_train_encoded, X_valid_encoded, y_train, y_valid):
  ## give the pre-RFE dataset to DNN to let it implement feature engineering
  train_features_np = X_train_encoded.values
  valid_features_np = X_valid_encoded.values

  # Normalize features using StandardScaler
  scaler = StandardScaler()
  train_features_np = scaler.fit_transform(train_features_np)
  valid_features_np = scaler.transform(valid_features_np)

  ## Normalize prediction target
  target_scaler = StandardScaler()
  y_train_scaled = target_scaler.fit_transform(y_train.values.reshape(-1, 1))
  y_valid_scaled = target_scaler.transform(y_valid.values.reshape(-1, 1))

  # Convert NumPy arrays to PyTorch tensors after normalization
  train_features = torch.tensor(train_features_np, dtype=torch.float32).to(device)
  valid_features = torch.tensor(valid_features_np, dtype=torch.float32).to(device)
  train_labels = torch.tensor(y_train_scaled, dtype=torch.float32).view(-1, 1).to(device)
  valid_labels = torch.tensor(y_valid_scaled, dtype=torch.float32).view(-1, 1).to(device)

  # Create dataloaders
  train_dataset = TensorDataset(train_features, train_labels)
  valid_dataset = TensorDataset(valid_features, valid_labels)
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
  val_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
  return train_loader, val_loader, valid_features, valid_labels, scaler, target_scaler

train_loader, val_loader, valid_features, valid_labels, scaler, target_scaler = dnn_loader(batch_size, X_train_encoded, X_valid_encoded, y_train, y_valid)

#### DNN tuning and training - obtained RMSE ON VALIDSET 0.1243

In [6]:
# Fixed hyperparameters (for aspects not tuned by Optuna)
dropout_rate = 0.2      # Dropout rate in the MLP
max_epochs = 100        # Maximum number of epochs per trial
patience = 10           # Patience for early stopping (used in scheduler)
criterion = nn.MSELoss()  # Mean Squared Error for regression

# ====== Model Definition Functions ======
def define_model(trial):
    """Define the MLP architecture for a given Optuna trial.
       Tuning both number of hidden layers and neurons per layer."""
    n_layers = trial.suggest_int("n_layers", 2, 4)
    layers = []
    input_dim = X_train_encoded.shape[1]  # number of features
    for i in range(n_layers):
        units = trial.suggest_int(f"n_units_l{i}", 64, 256)
        layers.append(nn.Linear(input_dim, units))
        layers.append(nn.ReLU())
        if dropout_rate > 0:
            layers.append(nn.Dropout(dropout_rate))
        input_dim = units
    layers.append(nn.Linear(input_dim, 1))  # Output layer for regression
    model = nn.Sequential(*layers)
    return model

def build_model_from_config(config):
    """Rebuild a model from a configuration dictionary (e.g., best found params)."""
    n_layers = config["n_layers"]
    layers = []
    input_dim = X_train_encoded.shape[1]
    for i in range(n_layers):
        units = config[f"n_units_l{i}"]
        layers.append(nn.Linear(input_dim, units))
        layers.append(nn.ReLU())
        if dropout_rate > 0:
            layers.append(nn.Dropout(dropout_rate))
        input_dim = units
    layers.append(nn.Linear(input_dim, 1))
    model = nn.Sequential(*layers)
    return model

## generate the data loaders for DNN model training
# train_loader, val_loader, valid_features, valid_labels, target_scaler = dnn_loader(batch_size, X_train_encoded, X_valid_encoded, y_train, y_valid)

# ====== Training Function with ReduceLROnPlateau ======
def train_model(model, lr):
    """
    Train the model for up to max_epochs with early stopping using
    ReduceLROnPlateau for dynamic learning rate adjustment.
    Returns the best validation loss and the best model state dict.
    """
    # Create optimizer with the tuned learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    # Setup ReduceLROnPlateau scheduler
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', patience=patience, factor=0.5, min_lr=1e-6, verbose=True
    )

    best_val_loss = float('inf')
    best_state_dict = None

    for epoch in range(1, max_epochs + 1):
        model.train()
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            preds = model(X_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()

        # Evaluate on validation set
        model.eval()
        total_samples = 0
        with torch.no_grad():
            preds = model(valid_features)
            val_loss = F.mse_loss(preds, valid_labels).item()

        # Update best validation loss and state dict if improved
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_state_dict = {k: v.cpu() for k, v in model.state_dict().items()}

        # Step the scheduler using the validation loss
        scheduler.step(val_loss)
        current_lr = optimizer.param_groups[0]['lr']
        if epoch % 100 == 0:
          print(f"Epoch {epoch}: val_loss={val_loss:.6f}, lr={current_lr:.1e}")

        # Early stopping: if lr has reached the minimum threshold, stop training
        if current_lr <= scheduler.min_lrs[0]:
            print(f"Learning rate reached minimum threshold ({scheduler.min_lrs[0]}). Early stopping.")
            break

    if best_state_dict is not None:
        model.load_state_dict(best_state_dict)
    return best_val_loss, best_state_dict

# ====== Global Tracking for Best Model ======
best_global_model_state = None
best_global_val_loss = float('inf')
best_global_params = None

# ====== Optuna Objective Function ======
def objective(trial):
    global best_global_model_state, best_global_val_loss, best_global_params

    # Tune the initial learning rate as well as the network architecture
    lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
    model = define_model(trial).to(device)
    val_loss, best_state = train_model(model, lr)

    # Update global best if current trial is better
    if val_loss < best_global_val_loss:
        best_global_val_loss = val_loss
        best_global_params = trial.params  # includes architecture & learning rate
        best_global_model_state = best_state
        torch.save(best_global_model_state, "best_dnnmodel_statedict_0317.pth")
        print(f"*** New best model found! Val MSE={val_loss:.6f}, Params={best_global_params}")
    return val_loss  # Optuna minimizes this

In [9]:
%%time
# ====== Run Optuna Study ======
study = optuna.create_study(direction="minimize")
print("Starting hyperparameter search...")
study.optimize(objective, n_trials=100, gc_after_trial=True)  # 100 trials

# ====== Hyperparameter Tuning Results ======
print("\nOptuna search completed.")
print(f"Number of finished trials: {len(study.trials)}")
print(f"Best trial index: {study.best_trial.number}")
print(f"Best validation MSE: {study.best_value:.6f}")
best_params = study.best_params
print("Best hyperparameter configuration:", best_params)
best_val_rmse = study.best_value ** 0.5
print(f"Best validation RMSE: {best_val_rmse:.6f}")

# Save the best parameters to a file
with open('dnn_best_params_0317.pkl', 'wb') as file:
    pickle.dump(best_params, file)


# rebuild the DNN
with open('dnn_best_params_0317.pkl', 'rb') as file:
    best_params = pickle.load(file)
best_model = build_model_from_config(best_params).to(device)
best_model.load_state_dict(torch.load("best_dnnmodel_statedict_0317.pth"))

# Evaluation on validation set
best_model.eval()
with torch.no_grad():
    valid_predictions = best_model(valid_features)
    valid_predictions = valid_predictions.cpu().numpy()  # if using GPU
    valid_predictions_inv = target_scaler.inverse_transform(valid_predictions)
    rmse = np.sqrt(mean_squared_error(y_valid, valid_predictions_inv))
    print(f"RMSE on validation set: {rmse:.4f}")

[I 2025-03-18 01:24:24,968] A new study created in memory with name: no-name-21771f26-26d1-4330-9f51-19189b829227


Starting hyperparameter search...




Epoch 100: val_loss=0.129977, lr=1.3e-05


[I 2025-03-18 01:24:37,544] Trial 0 finished with value: 0.11548255383968353 and parameters: {'lr': 0.0033178263844343987, 'n_layers': 2, 'n_units_l0': 209, 'n_units_l1': 79}. Best is trial 0 with value: 0.11548255383968353.


*** New best model found! Val MSE=0.115483, Params={'lr': 0.0033178263844343987, 'n_layers': 2, 'n_units_l0': 209, 'n_units_l1': 79}


[I 2025-03-18 01:24:46,976] Trial 1 finished with value: 0.12571844458580017 and parameters: {'lr': 0.00013456393289252073, 'n_layers': 4, 'n_units_l0': 231, 'n_units_l1': 189, 'n_units_l2': 79, 'n_units_l3': 114}. Best is trial 0 with value: 0.11548255383968353.


Epoch 100: val_loss=0.128265, lr=2.1e-06


[I 2025-03-18 01:24:56,467] Trial 2 finished with value: 0.1151684895157814 and parameters: {'lr': 0.00016410622783667102, 'n_layers': 4, 'n_units_l0': 240, 'n_units_l1': 213, 'n_units_l2': 212, 'n_units_l3': 85}. Best is trial 2 with value: 0.1151684895157814.


Epoch 100: val_loss=0.124059, lr=1.3e-06
*** New best model found! Val MSE=0.115168, Params={'lr': 0.00016410622783667102, 'n_layers': 4, 'n_units_l0': 240, 'n_units_l1': 213, 'n_units_l2': 212, 'n_units_l3': 85}


[I 2025-03-18 01:25:05,795] Trial 3 finished with value: 0.11780187487602234 and parameters: {'lr': 0.0003104309618859746, 'n_layers': 4, 'n_units_l0': 100, 'n_units_l1': 254, 'n_units_l2': 66, 'n_units_l3': 213}. Best is trial 2 with value: 0.1151684895157814.


Epoch 100: val_loss=0.145416, lr=4.9e-06


[I 2025-03-18 01:25:13,122] Trial 4 finished with value: 0.12049293518066406 and parameters: {'lr': 0.0004807206267647334, 'n_layers': 2, 'n_units_l0': 223, 'n_units_l1': 113}. Best is trial 2 with value: 0.1151684895157814.


Epoch 100: val_loss=0.131575, lr=3.8e-06


[I 2025-03-18 01:25:20,449] Trial 5 finished with value: 0.11827542632818222 and parameters: {'lr': 0.000442313739373958, 'n_layers': 2, 'n_units_l0': 153, 'n_units_l1': 218}. Best is trial 2 with value: 0.1151684895157814.


Epoch 100: val_loss=0.131838, lr=3.5e-06


[I 2025-03-18 01:25:28,552] Trial 6 finished with value: 0.11760304868221283 and parameters: {'lr': 0.00015394441247376604, 'n_layers': 3, 'n_units_l0': 93, 'n_units_l1': 97, 'n_units_l2': 116}. Best is trial 2 with value: 0.1151684895157814.


Epoch 100: val_loss=0.121878, lr=4.8e-06


[I 2025-03-18 01:25:36,013] Trial 7 finished with value: 0.102622389793396 and parameters: {'lr': 0.00025924210512089416, 'n_layers': 2, 'n_units_l0': 189, 'n_units_l1': 237}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.110690, lr=2.0e-06
*** New best model found! Val MSE=0.102622, Params={'lr': 0.00025924210512089416, 'n_layers': 2, 'n_units_l0': 189, 'n_units_l1': 237}


[I 2025-03-18 01:25:44,243] Trial 8 finished with value: 0.10972830653190613 and parameters: {'lr': 0.000762537875832645, 'n_layers': 3, 'n_units_l0': 177, 'n_units_l1': 186, 'n_units_l2': 125}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.135865, lr=3.0e-06


[I 2025-03-18 01:25:51,498] Trial 9 finished with value: 0.12255311757326126 and parameters: {'lr': 0.0023674590854357437, 'n_layers': 2, 'n_units_l0': 194, 'n_units_l1': 181}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.136640, lr=9.2e-06


[I 2025-03-18 01:25:59,881] Trial 10 finished with value: 0.13132011890411377 and parameters: {'lr': 1.8621548173300345e-05, 'n_layers': 3, 'n_units_l0': 140, 'n_units_l1': 138, 'n_units_l2': 251}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.131320, lr=1.9e-05


[I 2025-03-18 01:26:08,247] Trial 11 finished with value: 0.12567101418972015 and parameters: {'lr': 0.0010768596651674137, 'n_layers': 3, 'n_units_l0': 180, 'n_units_l1': 253, 'n_units_l2': 153}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.148966, lr=4.2e-06


[I 2025-03-18 01:26:16,495] Trial 12 finished with value: 0.12465661764144897 and parameters: {'lr': 2.56166974620471e-05, 'n_layers': 3, 'n_units_l0': 128, 'n_units_l1': 154, 'n_units_l2': 166}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.124813, lr=2.6e-05


[I 2025-03-18 01:26:23,798] Trial 13 finished with value: 0.12280292063951492 and parameters: {'lr': 0.006718888879805836, 'n_layers': 2, 'n_units_l0': 173, 'n_units_l1': 220}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.140360, lr=2.6e-05


[I 2025-03-18 01:26:32,137] Trial 14 finished with value: 0.11338162422180176 and parameters: {'lr': 4.956272141949645e-05, 'n_layers': 3, 'n_units_l0': 197, 'n_units_l1': 192, 'n_units_l2': 135}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.116464, lr=6.2e-06


[I 2025-03-18 01:26:40,443] Trial 15 finished with value: 0.11720781028270721 and parameters: {'lr': 0.0012908825729802567, 'n_layers': 3, 'n_units_l0': 252, 'n_units_l1': 230, 'n_units_l2': 188}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.135570, lr=5.0e-06


[I 2025-03-18 01:26:47,957] Trial 16 finished with value: 0.11122855544090271 and parameters: {'lr': 6.887472725165651e-05, 'n_layers': 2, 'n_units_l0': 125, 'n_units_l1': 173}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.111508, lr=1.7e-05


[I 2025-03-18 01:26:57,486] Trial 17 finished with value: 0.11978577077388763 and parameters: {'lr': 0.0008571003514880114, 'n_layers': 4, 'n_units_l0': 164, 'n_units_l1': 138, 'n_units_l2': 106, 'n_units_l3': 246}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.142100, lr=3.3e-06


[I 2025-03-18 01:27:04,757] Trial 18 finished with value: 0.12341005355119705 and parameters: {'lr': 1.0242231064129479e-05, 'n_layers': 2, 'n_units_l0': 207, 'n_units_l1': 202}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.123410, lr=1.0e-05


[I 2025-03-18 01:27:13,003] Trial 19 finished with value: 0.12224648147821426 and parameters: {'lr': 0.007261586143180624, 'n_layers': 3, 'n_units_l0': 68, 'n_units_l1': 156, 'n_units_l2': 100}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.143890, lr=2.8e-05


[I 2025-03-18 01:27:21,355] Trial 20 finished with value: 0.11419341713190079 and parameters: {'lr': 0.0003046959962203922, 'n_layers': 3, 'n_units_l0': 184, 'n_units_l1': 238, 'n_units_l2': 223}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.123355, lr=9.5e-06


[I 2025-03-18 01:27:28,606] Trial 21 finished with value: 0.11701520532369614 and parameters: {'lr': 6.170003012462087e-05, 'n_layers': 2, 'n_units_l0': 121, 'n_units_l1': 172}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.117710, lr=1.5e-05


[I 2025-03-18 01:27:35,900] Trial 22 finished with value: 0.11137660592794418 and parameters: {'lr': 6.515596263917137e-05, 'n_layers': 2, 'n_units_l0': 146, 'n_units_l1': 168}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.112387, lr=1.6e-05


[I 2025-03-18 01:27:43,134] Trial 23 finished with value: 0.11240439862012863 and parameters: {'lr': 0.00010593506058498924, 'n_layers': 2, 'n_units_l0': 114, 'n_units_l1': 203}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.113963, lr=3.3e-06


[I 2025-03-18 01:27:50,422] Trial 24 finished with value: 0.1195143386721611 and parameters: {'lr': 0.0006561214801811752, 'n_layers': 2, 'n_units_l0': 161, 'n_units_l1': 139}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.134563, lr=2.6e-06


[I 2025-03-18 01:27:57,794] Trial 25 finished with value: 0.11964607983827591 and parameters: {'lr': 0.0017603142107050762, 'n_layers': 2, 'n_units_l0': 172, 'n_units_l1': 238}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.140753, lr=6.9e-06


[I 2025-03-18 01:28:04,972] Trial 26 finished with value: 0.11435427516698837 and parameters: {'lr': 0.0002439627385995729, 'n_layers': 2, 'n_units_l0': 136, 'n_units_l1': 118}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.122357, lr=3.8e-06


[I 2025-03-18 01:28:13,361] Trial 27 finished with value: 0.12848824262619019 and parameters: {'lr': 3.2050341435024574e-05, 'n_layers': 3, 'n_units_l0': 85, 'n_units_l1': 181, 'n_units_l2': 175}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.128884, lr=3.2e-05


[I 2025-03-18 01:28:21,806] Trial 28 finished with value: 0.1184634119272232 and parameters: {'lr': 8.78081987460459e-05, 'n_layers': 3, 'n_units_l0': 110, 'n_units_l1': 202, 'n_units_l2': 134}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.120873, lr=5.5e-06


[I 2025-03-18 01:28:29,072] Trial 29 finished with value: 0.11600174009799957 and parameters: {'lr': 0.004213970126399037, 'n_layers': 2, 'n_units_l0': 213, 'n_units_l1': 148}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.147663, lr=1.6e-05


[I 2025-03-18 01:28:38,772] Trial 30 finished with value: 0.11701590567827225 and parameters: {'lr': 0.0001998288152474297, 'n_layers': 4, 'n_units_l0': 196, 'n_units_l1': 74, 'n_units_l2': 88, 'n_units_l3': 173}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.128046, lr=3.1e-06


[I 2025-03-18 01:28:46,248] Trial 31 finished with value: 0.113395094871521 and parameters: {'lr': 6.292039690086775e-05, 'n_layers': 2, 'n_units_l0': 147, 'n_units_l1': 167}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.115991, lr=1.6e-05


[I 2025-03-18 01:28:53,840] Trial 32 finished with value: 0.11830972135066986 and parameters: {'lr': 4.485474637363742e-05, 'n_layers': 2, 'n_units_l0': 152, 'n_units_l1': 173}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.118613, lr=1.1e-05


[I 2025-03-18 01:29:01,213] Trial 33 finished with value: 0.117255799472332 and parameters: {'lr': 9.675854684793195e-05, 'n_layers': 2, 'n_units_l0': 130, 'n_units_l1': 188}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.119068, lr=3.0e-06


[I 2025-03-18 01:29:08,478] Trial 34 finished with value: 0.11055675148963928 and parameters: {'lr': 0.0005157518376763536, 'n_layers': 2, 'n_units_l0': 183, 'n_units_l1': 165}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.123509, lr=4.0e-06


[I 2025-03-18 01:29:15,628] Trial 35 finished with value: 0.1053822934627533 and parameters: {'lr': 0.0005061110660709518, 'n_layers': 2, 'n_units_l0': 223, 'n_units_l1': 119}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.114920, lr=4.0e-06


[I 2025-03-18 01:29:24,968] Trial 36 finished with value: 0.11133226752281189 and parameters: {'lr': 0.0005169431834492707, 'n_layers': 4, 'n_units_l0': 230, 'n_units_l1': 122, 'n_units_l2': 138, 'n_units_l3': 156}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.128195, lr=2.0e-06


[I 2025-03-18 01:29:32,194] Trial 37 finished with value: 0.11698440462350845 and parameters: {'lr': 0.0004349233125208842, 'n_layers': 2, 'n_units_l0': 220, 'n_units_l1': 91}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.126758, lr=3.4e-06


[I 2025-03-18 01:29:39,388] Trial 38 finished with value: 0.1158604621887207 and parameters: {'lr': 0.0007654645348918938, 'n_layers': 2, 'n_units_l0': 241, 'n_units_l1': 100}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.140661, lr=3.0e-06


[I 2025-03-18 01:29:48,782] Trial 39 finished with value: 0.11216350644826889 and parameters: {'lr': 0.0003147220834938573, 'n_layers': 4, 'n_units_l0': 208, 'n_units_l1': 106, 'n_units_l2': 202, 'n_units_l3': 75}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.128176, lr=2.5e-06


[I 2025-03-18 01:29:56,004] Trial 40 finished with value: 0.11053451150655746 and parameters: {'lr': 0.0013836576539214597, 'n_layers': 2, 'n_units_l0': 186, 'n_units_l1': 85}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.129899, lr=5.4e-06


[I 2025-03-18 01:30:03,318] Trial 41 finished with value: 0.12573815882205963 and parameters: {'lr': 0.0017848372784579495, 'n_layers': 2, 'n_units_l0': 189, 'n_units_l1': 83}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.138788, lr=7.0e-06


[I 2025-03-18 01:30:10,559] Trial 42 finished with value: 0.11187387257814407 and parameters: {'lr': 0.0011775878409558231, 'n_layers': 2, 'n_units_l0': 175, 'n_units_l1': 83}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.121940, lr=9.2e-06


[I 2025-03-18 01:30:17,966] Trial 43 finished with value: 0.11467334628105164 and parameters: {'lr': 0.0031725623921902736, 'n_layers': 2, 'n_units_l0': 203, 'n_units_l1': 64}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.137272, lr=1.2e-05


[I 2025-03-18 01:30:25,281] Trial 44 finished with value: 0.11315614730119705 and parameters: {'lr': 0.0005318878297918291, 'n_layers': 2, 'n_units_l0': 186, 'n_units_l1': 132}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.121867, lr=8.3e-06


[I 2025-03-18 01:30:33,619] Trial 45 finished with value: 0.11119600385427475 and parameters: {'lr': 0.00040102841492680195, 'n_layers': 3, 'n_units_l0': 216, 'n_units_l1': 127, 'n_units_l2': 253}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.132832, lr=6.3e-06


[I 2025-03-18 01:30:40,870] Trial 46 finished with value: 0.12107114493846893 and parameters: {'lr': 0.0016068931808525778, 'n_layers': 2, 'n_units_l0': 170, 'n_units_l1': 110}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.141406, lr=1.3e-05


[I 2025-03-18 01:30:49,167] Trial 47 finished with value: 0.12289991229772568 and parameters: {'lr': 0.00018937772609252412, 'n_layers': 3, 'n_units_l0': 200, 'n_units_l1': 149, 'n_units_l2': 121}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.129443, lr=3.0e-06


[I 2025-03-18 01:30:56,492] Trial 48 finished with value: 0.1309807300567627 and parameters: {'lr': 0.0008724888435876349, 'n_layers': 2, 'n_units_l0': 244, 'n_units_l1': 247}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.145827, lr=3.4e-06


[I 2025-03-18 01:31:03,868] Trial 49 finished with value: 0.11767320334911346 and parameters: {'lr': 0.0024351709912077174, 'n_layers': 2, 'n_units_l0': 232, 'n_units_l1': 213}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.127425, lr=9.5e-06


[I 2025-03-18 01:31:12,347] Trial 50 finished with value: 0.11212745308876038 and parameters: {'lr': 0.0001412204441300582, 'n_layers': 3, 'n_units_l0': 180, 'n_units_l1': 161, 'n_units_l2': 158}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.118227, lr=4.4e-06


[I 2025-03-18 01:31:20,705] Trial 51 finished with value: 0.1213013306260109 and parameters: {'lr': 0.00040878390528159423, 'n_layers': 3, 'n_units_l0': 216, 'n_units_l1': 127, 'n_units_l2': 246}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.131851, lr=3.2e-06


[I 2025-03-18 01:31:28,948] Trial 52 finished with value: 0.12185604125261307 and parameters: {'lr': 0.0006221817353662766, 'n_layers': 3, 'n_units_l0': 228, 'n_units_l1': 95, 'n_units_l2': 231}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.141228, lr=2.4e-06


[I 2025-03-18 01:31:37,259] Trial 53 finished with value: 0.11765122413635254 and parameters: {'lr': 0.0002556124979969124, 'n_layers': 3, 'n_units_l0': 193, 'n_units_l1': 144, 'n_units_l2': 178}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.123023, lr=2.0e-06


[I 2025-03-18 01:31:45,659] Trial 54 finished with value: 0.11170262098312378 and parameters: {'lr': 0.0009561324315058202, 'n_layers': 3, 'n_units_l0': 224, 'n_units_l1': 131, 'n_units_l2': 194}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.141340, lr=7.5e-06


[I 2025-03-18 01:31:52,882] Trial 55 finished with value: 0.10472949594259262 and parameters: {'lr': 0.0003746951836683642, 'n_layers': 2, 'n_units_l0': 254, 'n_units_l1': 118}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.116965, lr=2.9e-06


[I 2025-03-18 01:32:00,238] Trial 56 finished with value: 0.12294215708971024 and parameters: {'lr': 0.0006864897014981536, 'n_layers': 2, 'n_units_l0': 252, 'n_units_l1': 223}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.139822, lr=2.7e-06


[I 2025-03-18 01:32:07,473] Trial 57 finished with value: 0.12150826305150986 and parameters: {'lr': 0.0014475580609846967, 'n_layers': 2, 'n_units_l0': 163, 'n_units_l1': 115}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.140414, lr=5.7e-06


[I 2025-03-18 01:32:15,013] Trial 58 finished with value: 0.10474969446659088 and parameters: {'lr': 0.00034210073609568307, 'n_layers': 2, 'n_units_l0': 180, 'n_units_l1': 182}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.114844, lr=2.7e-06


[I 2025-03-18 01:32:22,415] Trial 59 finished with value: 0.10341215133666992 and parameters: {'lr': 0.0003561388824638216, 'n_layers': 2, 'n_units_l0': 256, 'n_units_l1': 196}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.114078, lr=2.8e-06


[I 2025-03-18 01:32:29,696] Trial 60 finished with value: 0.11316556483507156 and parameters: {'lr': 0.00024310059281875028, 'n_layers': 2, 'n_units_l0': 256, 'n_units_l1': 195}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.121837, lr=3.8e-06


[I 2025-03-18 01:32:37,144] Trial 61 finished with value: 0.11629349738359451 and parameters: {'lr': 0.0003603168330202744, 'n_layers': 2, 'n_units_l0': 246, 'n_units_l1': 183}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.126442, lr=2.8e-06


[I 2025-03-18 01:32:44,430] Trial 62 finished with value: 0.10958818346261978 and parameters: {'lr': 0.0002870379240780434, 'n_layers': 2, 'n_units_l0': 234, 'n_units_l1': 193}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.119183, lr=2.2e-06


[I 2025-03-18 01:32:51,682] Trial 63 finished with value: 0.12001512199640274 and parameters: {'lr': 0.00019585827454278424, 'n_layers': 2, 'n_units_l0': 236, 'n_units_l1': 209}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.126534, lr=3.1e-06


[I 2025-03-18 01:32:58,988] Trial 64 finished with value: 0.1171027347445488 and parameters: {'lr': 0.00030411413514709227, 'n_layers': 2, 'n_units_l0': 247, 'n_units_l1': 196}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.121723, lr=4.8e-06


[I 2025-03-18 01:33:06,268] Trial 65 finished with value: 0.11461568623781204 and parameters: {'lr': 0.00011624449373241268, 'n_layers': 2, 'n_units_l0': 250, 'n_units_l1': 180}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.117289, lr=3.6e-06


[I 2025-03-18 01:33:13,582] Trial 66 finished with value: 0.104876309633255 and parameters: {'lr': 0.00015515517782742396, 'n_layers': 2, 'n_units_l0': 235, 'n_units_l1': 187}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.107560, lr=2.4e-06


[I 2025-03-18 01:33:21,069] Trial 67 finished with value: 0.11743593215942383 and parameters: {'lr': 0.00016681088380376727, 'n_layers': 2, 'n_units_l0': 241, 'n_units_l1': 227}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.120541, lr=5.2e-06


[I 2025-03-18 01:33:28,336] Trial 68 finished with value: 0.10985774546861649 and parameters: {'lr': 0.0002479784118257129, 'n_layers': 2, 'n_units_l0': 236, 'n_units_l1': 191}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.117310, lr=3.9e-06


[I 2025-03-18 01:33:35,729] Trial 69 finished with value: 0.11350652575492859 and parameters: {'lr': 0.00012781846746483494, 'n_layers': 2, 'n_units_l0': 236, 'n_units_l1': 207}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.117340, lr=4.0e-06


[I 2025-03-18 01:33:43,030] Trial 70 finished with value: 0.1200316995382309 and parameters: {'lr': 0.0003420763437160053, 'n_layers': 2, 'n_units_l0': 225, 'n_units_l1': 175}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.129596, lr=1.3e-06


[I 2025-03-18 01:33:50,379] Trial 71 finished with value: 0.1082385927438736 and parameters: {'lr': 0.0006322648678291365, 'n_layers': 2, 'n_units_l0': 255, 'n_units_l1': 198}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.128443, lr=4.9e-06


[I 2025-03-18 01:33:57,745] Trial 72 finished with value: 0.11434858292341232 and parameters: {'lr': 0.0005815432239028712, 'n_layers': 2, 'n_units_l0': 256, 'n_units_l1': 197}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.125535, lr=4.5e-06


[I 2025-03-18 01:34:05,032] Trial 73 finished with value: 0.1128561869263649 and parameters: {'lr': 0.0004505665215164392, 'n_layers': 2, 'n_units_l0': 247, 'n_units_l1': 187}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.130365, lr=3.5e-06


[I 2025-03-18 01:34:12,336] Trial 74 finished with value: 0.10441812127828598 and parameters: {'lr': 0.00016339366372688464, 'n_layers': 2, 'n_units_l0': 233, 'n_units_l1': 218}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.109008, lr=2.6e-06


[I 2025-03-18 01:34:19,598] Trial 75 finished with value: 0.1091984435915947 and parameters: {'lr': 8.805745309989135e-05, 'n_layers': 2, 'n_units_l0': 242, 'n_units_l1': 248}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.110027, lr=5.5e-06


[I 2025-03-18 01:34:26,866] Trial 76 finished with value: 0.1075592041015625 and parameters: {'lr': 0.0002052251837121819, 'n_layers': 2, 'n_units_l0': 251, 'n_units_l1': 234}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.115953, lr=1.6e-06


[I 2025-03-18 01:34:34,185] Trial 77 finished with value: 0.11877990514039993 and parameters: {'lr': 0.00015891171017022663, 'n_layers': 2, 'n_units_l0': 211, 'n_units_l1': 236}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.120619, lr=5.0e-06


[I 2025-03-18 01:34:41,552] Trial 78 finished with value: 0.1165994331240654 and parameters: {'lr': 0.0001961479830203893, 'n_layers': 2, 'n_units_l0': 249, 'n_units_l1': 232}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.122287, lr=3.1e-06


[I 2025-03-18 01:34:49,088] Trial 79 finished with value: 0.10777543485164642 and parameters: {'lr': 0.00022004066936176284, 'n_layers': 2, 'n_units_l0': 223, 'n_units_l1': 217}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.117105, lr=1.7e-06


[I 2025-03-18 01:34:56,374] Trial 80 finished with value: 0.11266855150461197 and parameters: {'lr': 0.00015618026884202767, 'n_layers': 2, 'n_units_l0': 238, 'n_units_l1': 256}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.115499, lr=2.4e-06


[I 2025-03-18 01:35:03,810] Trial 81 finished with value: 0.11782646924257278 and parameters: {'lr': 0.00023260721470628887, 'n_layers': 2, 'n_units_l0': 222, 'n_units_l1': 222}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.122087, lr=3.6e-06


[I 2025-03-18 01:35:11,224] Trial 82 finished with value: 0.11990121752023697 and parameters: {'lr': 0.00037954424518726166, 'n_layers': 2, 'n_units_l0': 228, 'n_units_l1': 215}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.135017, lr=1.5e-06


[I 2025-03-18 01:35:18,773] Trial 83 finished with value: 0.10848639160394669 and parameters: {'lr': 0.0002816465928332432, 'n_layers': 2, 'n_units_l0': 204, 'n_units_l1': 241}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.119776, lr=2.2e-06


[I 2025-03-18 01:35:26,171] Trial 84 finished with value: 0.10689640045166016 and parameters: {'lr': 7.494780912505745e-05, 'n_layers': 2, 'n_units_l0': 216, 'n_units_l1': 227}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.108424, lr=9.4e-06


[I 2025-03-18 01:35:33,609] Trial 85 finished with value: 0.11855678260326385 and parameters: {'lr': 7.75240319375301e-05, 'n_layers': 2, 'n_units_l0': 218, 'n_units_l1': 227}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.121695, lr=2.4e-06


[I 2025-03-18 01:35:40,818] Trial 86 finished with value: 0.12371326237916946 and parameters: {'lr': 5.227014283027884e-05, 'n_layers': 2, 'n_units_l0': 231, 'n_units_l1': 245}. Best is trial 7 with value: 0.102622389793396.


Learning rate reached minimum threshold (1e-06). Early stopping.


[I 2025-03-18 01:35:48,193] Trial 87 finished with value: 0.11100173741579056 and parameters: {'lr': 3.4327826588129796e-05, 'n_layers': 2, 'n_units_l0': 212, 'n_units_l1': 231}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.111596, lr=1.7e-05


[I 2025-03-18 01:35:55,549] Trial 88 finished with value: 0.11065763235092163 and parameters: {'lr': 0.00010619930459301615, 'n_layers': 2, 'n_units_l0': 250, 'n_units_l1': 209}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.115021, lr=6.6e-06


[I 2025-03-18 01:36:02,951] Trial 89 finished with value: 0.10619866847991943 and parameters: {'lr': 0.0001290213138573072, 'n_layers': 2, 'n_units_l0': 169, 'n_units_l1': 235}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.112816, lr=4.0e-06


[I 2025-03-18 01:36:10,343] Trial 90 finished with value: 0.12331629544496536 and parameters: {'lr': 7.16587965867223e-05, 'n_layers': 2, 'n_units_l0': 169, 'n_units_l1': 226}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.124312, lr=9.0e-06


[I 2025-03-18 01:36:17,734] Trial 91 finished with value: 0.11936487257480621 and parameters: {'lr': 0.00017657044597169048, 'n_layers': 2, 'n_units_l0': 156, 'n_units_l1': 234}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.125400, lr=5.5e-06


[I 2025-03-18 01:36:25,407] Trial 92 finished with value: 0.11719828099012375 and parameters: {'lr': 0.00012528723279645631, 'n_layers': 2, 'n_units_l0': 180, 'n_units_l1': 241}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.118703, lr=3.9e-06


[I 2025-03-18 01:36:33,080] Trial 93 finished with value: 0.11226079612970352 and parameters: {'lr': 0.00013761120546360072, 'n_layers': 2, 'n_units_l0': 240, 'n_units_l1': 203}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.115641, lr=4.3e-06


[I 2025-03-18 01:36:40,433] Trial 94 finished with value: 0.11550123244524002 and parameters: {'lr': 0.00047564728852383855, 'n_layers': 2, 'n_units_l0': 193, 'n_units_l1': 252}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.136210, lr=3.7e-06


[I 2025-03-18 01:36:47,873] Trial 95 finished with value: 0.1257806122303009 and parameters: {'lr': 0.0003636541711586475, 'n_layers': 2, 'n_units_l0': 165, 'n_units_l1': 240}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.131206, lr=1.4e-06


[I 2025-03-18 01:36:55,128] Trial 96 finished with value: 0.1153620108962059 and parameters: {'lr': 9.961253416334098e-05, 'n_layers': 2, 'n_units_l0': 176, 'n_units_l1': 223}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.117299, lr=1.2e-05


[I 2025-03-18 01:37:02,441] Trial 97 finished with value: 0.1215846985578537 and parameters: {'lr': 0.0002136477779622632, 'n_layers': 2, 'n_units_l0': 244, 'n_units_l1': 179}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.126571, lr=3.3e-06


[I 2025-03-18 01:37:09,677] Trial 98 finished with value: 0.11680358648300171 and parameters: {'lr': 0.00033007256660124454, 'n_layers': 2, 'n_units_l0': 232, 'n_units_l1': 108}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.126168, lr=1.3e-06


[I 2025-03-18 01:37:16,999] Trial 99 finished with value: 0.11158479750156403 and parameters: {'lr': 5.4820096395292546e-05, 'n_layers': 2, 'n_units_l0': 253, 'n_units_l1': 220}. Best is trial 7 with value: 0.102622389793396.


Epoch 100: val_loss=0.111854, lr=3.4e-06

Optuna search completed.
Number of finished trials: 100
Best trial index: 7
Best validation MSE: 0.102622
Best hyperparameter configuration: {'lr': 0.00025924210512089416, 'n_layers': 2, 'n_units_l0': 189, 'n_units_l1': 237}
Best validation RMSE: 0.320347
RMSE on validation set: 0.1253
CPU times: user 12min 33s, sys: 11.4 s, total: 12min 44s
Wall time: 12min 52s




### Conduct final inference - RMSE returned by kaggle: 0.1286

#### refit the model based on best parameter and combined train/valid set


In [13]:
### refit the DNN based on the combined train/valid dataset
def dnn_refit():
  # ===== Combine Train & Validation Data =====
  X_train_valid_np = np.vstack((X_train_encoded, X_valid_encoded))
  y_train_valid_np = np.hstack((y_train, y_valid))

  # Standardize features based on previously fitted scaler
  X_train_valid_scaled = scaler.transform(X_train_valid_np)
  y_train_valid_scaled = target_scaler.transform(y_train_valid_np.reshape(-1, 1))

  # Convert to torch tensors
  X_train_valid = torch.tensor(X_train_valid_scaled, dtype=torch.float32).to(device)
  y_train_valid = torch.tensor(y_train_valid_scaled, dtype=torch.float32).view(-1, 1).to(device)

  # Create dataset & dataloader
  train_valid_dataset = TensorDataset(X_train_valid, y_train_valid)
  train_valid_loader = DataLoader(train_valid_dataset, batch_size=batch_size, shuffle=True)

  # ===== Load Best Hyperparameters from Optuna =====
  with open('dnn_best_params_0317.pkl', 'rb') as file:
      best_params = pickle.load(file)
  print("Refitting model with best hyperparameters:", best_params)

  # Instantiate the model
  modelrefit = build_model_from_config(best_params).to(device)

  # Define loss function and optimizer
  criterion = nn.MSELoss()
  optimizer = torch.optim.Adam(modelrefit.parameters(), lr=best_params["lr"])

  # ===== Train the Model with Best Hyperparameters =====
  num_epochs = 100  # You can adjust based on validation performance
  for epoch in range(num_epochs):
      modelrefit.train()
      epoch_loss = 0.0
      for inputs, targets in train_valid_loader:
          optimizer.zero_grad()
          outputs = modelrefit(inputs)
          loss = criterion(outputs, targets)
          loss.backward()
          optimizer.step()
          epoch_loss += loss.item()

      if (epoch + 1) % 10 == 0:  # Print loss every 10 epochs
          print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss / len(train_valid_loader):.4f}')

  # # ===== Save the Model for Future Inference =====
  # torch.save(model.state_dict(), "dnnmodel2_inference.pth")
  # print("Final trained model saved as 'dnnmodel2_inference.pth'")
  return modelrefit

### process testdata
def testdata_processing():
## drop 2 categorical features with super high NA%
  X_test2 = X_test.copy(deep=True)
  X_test2.drop(['PoolQC','MiscFeature'], axis=1, inplace=True)
  ### fill NA for a bunch of categorical features - fill with 'None'
  catna_none = ['Alley','Fence','MasVnrType', 'FireplaceQu', 'GarageType', 'GarageQual', 'GarageCond', 'GarageFinish', 'BsmtFinType2', 'BsmtExposure', 'BsmtFinType1', 'BsmtQual', 'BsmtCond']
  X_test2[catna_none] = X_test2[catna_none].fillna('None')
  ### fill na for cat features 'Electrical' - fill with most frequent value
  most_frequent_value = X_test2['Electrical'].mode()[0]
  X_test2['Electrical'] = X_test2['Electrical'].fillna(most_frequent_value)
  ### fill NA for 2 numerical features
  X_test2['GarageYrBlt'] = X_test2['GarageYrBlt'].fillna(1700) ##fill na of garageyrblt with 1700
  X_test2['MasVnrArea'] = X_test2['MasVnrArea'].fillna(0) ##fill na of MasVnrArea with 0
  ##fill na of LotFrontage with mean
  X_test2['LotFrontage'] = X_test2['LotFrontage'].fillna(X_test2['LotFrontage'].mean())
  ## implement meanencoder
  X_test_encoded = mean_encoder.transform(X_test2)

  print("Training Set shape: ", X_train_encoded.shape)
  print("Valid Set shape: ", X_valid_encoded.shape)
  print("Test Set shape: ", X_test_encoded.shape)
  ## handle NAs only existing in testdata
  X_test_encoded['BsmtFinSF1'] = X_test_encoded['BsmtFinSF1'].fillna(X_test_encoded['BsmtFinSF1'].mean())
  X_test_encoded['BsmtFinSF2'] = X_test_encoded['BsmtFinSF2'].fillna(X_test_encoded['BsmtFinSF2'].mean())
  X_test_encoded['BsmtUnfSF'] = X_test_encoded['BsmtUnfSF'].fillna(0)
  X_test_encoded['TotalBsmtSF'] = X_test_encoded['TotalBsmtSF'].fillna(X_test_encoded['TotalBsmtSF'].mean())
  X_test_encoded['BsmtFullBath'] = X_test_encoded['BsmtFullBath'].fillna(X_test_encoded['BsmtFullBath'].median())
  X_test_encoded['BsmtHalfBath'] = X_test_encoded['BsmtHalfBath'].fillna(X_test_encoded['BsmtHalfBath'].median())
  X_test_encoded['GarageCars'] = X_test_encoded['GarageCars'].fillna(X_test_encoded['GarageCars'].median())
  X_test_encoded['GarageArea'] = X_test_encoded['GarageArea'].fillna(X_test_encoded['GarageArea'].mean())
  ## double check the result
  print(f'double check NA vavlues: {X_test_encoded.isnull().sum().sum()}')
  ### standardize inference features
  test_features_np = X_test_encoded.values
  test_features_np = scaler.transform(test_features_np)
  test_features = torch.tensor(test_features_np, dtype=torch.float32).to(device)
  return test_features




In [14]:
modelrefit = dnn_refit()
test_features = testdata_processing()

# Load the model for inference
#model = build_model_from_config(best_params).to(device)
#model.load_state_dict(torch.load("dnnmodel2_inference.pth"))

modelrefit.eval()  # Set to evaluation mode
with torch.no_grad():
    test_predictions = modelrefit(test_features)
    test_predictions = test_predictions.cpu().numpy()  # if using GPU
    test_predictions_inv = target_scaler.inverse_transform(test_predictions)

testpred_dnn = np.exp(test_predictions_inv)
print(np.mean(testpred_dnn))

test_id=X_test.index.tolist()
def submit_method(test_id, y_prediction):
    sub = pd.DataFrame()
    sub['Id'] = test_id
    sub['SalePrice'] = y_prediction
    print(sub.shape)
    return sub

subcsv=submit_method(test_id, testpred_dnn)



Refitting model with best hyperparameters: {'lr': 0.00025924210512089416, 'n_layers': 2, 'n_units_l0': 189, 'n_units_l1': 237}
Epoch [10/100], Loss: 0.0993
Epoch [20/100], Loss: 0.0671
Epoch [30/100], Loss: 0.0551
Epoch [40/100], Loss: 0.0488
Epoch [50/100], Loss: 0.0403
Epoch [60/100], Loss: 0.0374
Epoch [70/100], Loss: 0.0336
Epoch [80/100], Loss: 0.0309
Epoch [90/100], Loss: 0.0299
Epoch [100/100], Loss: 0.0269
Training Set shape:  (1260, 77)
Valid Set shape:  (200, 77)
Test Set shape:  (1459, 77)
double check NA vavlues: 0
178846.3
(1459, 2)
