### Importing Required Libraries
- pandas for data manipulation
- numpy for numerical operations
- scikit-learn for machine learning


In [1]:
import optuna
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import pandas as pd


  from .autonotebook import tqdm as notebook_tqdm



Loads and preprocesses training data



### Loading Training Dataset
Loading the training data for model development


In [2]:
train_df = pd.read_csv("assets/final_preprocessed_train.csv")
train_df.drop(columns=["Unnamed: 0"], inplace=True)
y = train_df["SalePrice"]
X = train_df.drop(columns=["SalePrice"])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Hyperparameter Optimization with Optuna
The code below performs hyperparameter optimization for a Random Forest model using Optuna:
1. Defines an objective function that:
   - Takes hyperparameter suggestions from Optuna
   - Trains a Random Forest model with those parameters
   - Returns the RMSE on test data
2. Creates and runs an Optuna study with 500 trials
3. Gets the best parameters and trains final model
4. Evaluates and prints the results


## Define objective function for Optuna


### Feature Engineering
Creating new features or transforming existing ones


In [4]:

# Define objective function for Optuna
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 200, 2000, step=200),  # Wider range, larger step
        "max_depth": trial.suggest_int("max_depth", 5, 30),  # More depth options
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 15),  # Adjusted upper limit
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 8),  # Limited to reasonable values
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None]),  # Common RF choices
        "bootstrap": trial.suggest_categorical("bootstrap", [True, False]),
    }

    model = RandomForestRegressor(**params, random_state=42, n_jobs=-1)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    return rmse  

# Run Optuna optimization
study = optuna.create_study(direction="minimize")  # Minimize RMSE
study.optimize(objective, n_trials=200)  # Reduced trials for faster tuning

# Get best hyperparameters
best_params = study.best_params

# Train final model with best parameters
model = RandomForestRegressor(**best_params, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

# Print results
print("Best hyperparameters:", best_params)
print(f"Best RMSE: {rmse}")


[I 2025-04-04 14:40:41,068] A new study created in memory with name: no-name-8eeaa86a-db03-4c56-a50d-1c8ba23fe8a4
[I 2025-04-04 14:40:41,702] Trial 0 finished with value: 27694.32757678291 and parameters: {'n_estimators': 800, 'max_depth': 20, 'min_samples_split': 6, 'min_samples_leaf': 1, 'max_features': 'log2', 'bootstrap': False}. Best is trial 0 with value: 27694.32757678291.
[I 2025-04-04 14:40:43,491] Trial 1 finished with value: 42672.18818937182 and parameters: {'n_estimators': 1000, 'max_depth': 28, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': None, 'bootstrap': False}. Best is trial 0 with value: 27694.32757678291.
[I 2025-04-04 14:40:43,856] Trial 2 finished with value: 42554.58926482986 and parameters: {'n_estimators': 200, 'max_depth': 13, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': None, 'bootstrap': False}. Best is trial 0 with value: 27694.32757678291.
[I 2025-04-04 14:40:45,160] Trial 3 finished with value: 36762.318651178975 and pa

Best hyperparameters: {'n_estimators': 200, 'max_depth': 21, 'min_samples_split': 4, 'min_samples_leaf': 1, 'max_features': 'sqrt', 'bootstrap': False}
Best RMSE: 25416.643432979385


### Code Execution
Executing code for data analysis or model development
