In [20]:
!pip install optuna xgboost lightgbm "mlflow<3"



In [22]:
base_folder = "R:\\Downloads\\housing_app_fall25-main\\housing_app_fall25-main"
%cd "{base_folder}"

R:\Downloads\housing_app_fall25-main\housing_app_fall25-main


In [23]:
import sqlite3
import pandas as pd

conn = sqlite3.connect(f"{base_folder}/data/titanic.db")

titanic = pd.read_sql_query(
    """
    SELECT
        p.passenger_id,
        p.Pclass,
        p.Age,
        p.Fare,
        ps.SibSp,
        ps.Parch,
        ps.Survived,
        s.name AS sex
    FROM passenger AS p
    JOIN passenger_survival AS ps
        ON ps.passenger_id = p.passenger_id
    JOIN sex AS s
        ON s.sex_id = p.sex_id
    ORDER BY p.passenger_id
    """,
    conn,
)

conn.close()

titanic.head()


Unnamed: 0,passenger_id,Pclass,Age,Fare,SibSp,Parch,Survived,sex
0,0,3,22.0,7.25,1,0,0,male
1,1,1,38.0,71.2833,1,0,1,female
2,2,3,26.0,7.925,0,0,1,female
3,3,1,35.0,53.1,1,0,1,female
4,4,3,35.0,8.05,0,0,0,male


In [25]:
# ============================================================
# TRAIN / TEST WITH OPTUNA (NO PCA + PCA)
# ============================================================

import optuna
from sklearn.base import clone
from sklearn.pipeline import make_pipeline
from sklearn.decomposition import PCA
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
import mlflow
import mlflow.sklearn

optuna.logging.set_verbosity(optuna.logging.WARNING)

def run_optuna(model_name, base_model, use_pca):

    def objective(trial):
        model = clone(base_model)

        if model_name == "ridge":
            model.set_params(alpha=trial.suggest_float("alpha", 0.1, 100.0, log=True))

        elif model_name == "histgradientboosting":
            model.set_params(
                learning_rate=trial.suggest_float("learning_rate", 0.05, 0.2),
                max_depth=trial.suggest_int("max_depth", 3, 8)
            )

        elif model_name == "xgboost":
            model.set_params(
                n_estimators=trial.suggest_int("n_estimators", 100, 300, step=50),
                learning_rate=trial.suggest_float("learning_rate", 0.05, 0.2),
                max_depth=trial.suggest_int("max_depth", 3, 8)
            )

        elif model_name == "lightgbm":
            model.set_params(
                n_estimators=trial.suggest_int("n_estimators", 100, 300, step=50),
                learning_rate=trial.suggest_float("learning_rate", 0.05, 0.2),
                num_leaves=trial.suggest_int("num_leaves", 20, 80)
            )

        steps = [clone(preprocessing)]
        if use_pca:
            steps.append(PCA(n_components=0.95))
        steps.append(model)

        pipe = make_pipeline(*steps)

        return cross_val_score(
            pipe,
            X_train,
            y_train,
            cv=3,
            scoring="f1"
        ).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=10)

    best_model = clone(base_model).set_params(**study.best_params)

    steps = [clone(preprocessing)]
    if use_pca:
        steps.append(PCA(n_components=0.95))
    steps.append(best_model)

    final_pipeline = make_pipeline(*steps)
    final_pipeline.fit(X_train, y_train)

    test_f1 = f1_score(y_test, final_pipeline.predict(X_test))

    run_name = f"{model_name}_optuna_{'with_pca' if use_pca else 'baseline'}"

    with mlflow.start_run(run_name=run_name):
        mlflow.log_param("model_family", model_name)
        mlflow.log_param("uses_pca", use_pca)
        mlflow.log_param("is_tuned", True)
        mlflow.log_param("cv_folds", 3)

        mlflow.log_params(study.best_params)
        mlflow.log_metric("test_f1", test_f1)

        mlflow.sklearn.log_model(final_pipeline, "model")

    print(f"{run_name} | Test F1={test_f1:.4f}")


models_optuna = {
    "ridge": RidgeClassifier(),
    "histgradientboosting": HistGradientBoostingClassifier(random_state=42),
    "xgboost": XGBClassifier(
        eval_metric="logloss",
        random_state=42,
        n_jobs=-1
    ),
    "lightgbm": LGBMClassifier(
        random_state=42,
        verbose=-1
    ),
}

for model_name, model in models_optuna.items():
    for use_pca in [False, True]:
        run_optuna(model_name, model, use_pca)




üèÉ View run ridge_optuna_baseline at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/59871d5c2cd44e768f3b000c80f1d43e
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
ridge_optuna_baseline | Test F1=0.7287




üèÉ View run ridge_optuna_with_pca at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/9e4497702dbd42b8981743cd6c692880
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
ridge_optuna_with_pca | Test F1=0.7287




üèÉ View run histgradientboosting_optuna_baseline at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/e63b57e541df4575828b980241b63a01
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
histgradientboosting_optuna_baseline | Test F1=0.7328




üèÉ View run histgradientboosting_optuna_with_pca at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/4e62e2cc0ee547bca606d95201316a2e
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
histgradientboosting_optuna_with_pca | Test F1=0.7556




üèÉ View run xgboost_optuna_baseline at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/c954fe4429d94813b33935866f5ea851
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
xgboost_optuna_baseline | Test F1=0.7188




üèÉ View run xgboost_optuna_with_pca at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/02e5e185dac5422a9457eb480c3aeed7
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
xgboost_optuna_with_pca | Test F1=0.7463




üèÉ View run lightgbm_optuna_baseline at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/f65253101360477e90a7a2a7d1909c49
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
lightgbm_optuna_baseline | Test F1=0.7353




üèÉ View run lightgbm_optuna_with_pca at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1/runs/134c521357274ecf85abccafcd3dfda3
üß™ View experiment at: https://dagshub.com/rahulmugada/titanic.mlflow/#/experiments/1
lightgbm_optuna_with_pca | Test F1=0.6963
