In [2]:
import mlflow

from mlflow.tracking import MlflowClient

MLFLOW_TRACKING_URI = "sqlite:///mlflow.db"

client = MlflowClient(tracking_uri=MLFLOW_TRACKING_URI)

client.search_experiments()

[<Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment-tracking/mlruns/1', creation_time=1716427926375, experiment_id='1', last_update_time=1716427926375, lifecycle_stage='active', name='nyc-experiment', tags={}>,
 <Experiment: artifact_location='/workspaces/mlops-zoomcamp/02-experiment-tracking/mlruns/0', creation_time=1716427926347, experiment_id='0', last_update_time=1716427926347, lifecycle_stage='active', name='Default', tags={}>]

In [3]:
mlflow.set_tracking_uri("http://127.0.0.0:5000")

In [4]:
print(f"tracking URI: {mlflow.get_tracking_uri()}")

tracking URI: http://127.0.0.0:5000


In [5]:
import os
import pickle
import click
import mlflow
import numpy as np
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
from hyperopt.pyll import scope
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("random-forest-hyperopt-new")


def load_pickle(filename: str):
    with open(filename, "rb") as f_in:
        return pickle.load(f_in)


In [10]:


def run_optimization(num_trials: int):

    X_train, y_train = load_pickle("output/train.pkl")
    X_val, y_val = load_pickle("output/val.pkl")

    def objective(params):
        with mlflow.start_run():
            mlflow.set_tag("model","RandomForestRegressor")
            mlflow.log_params(params)
            rf = RandomForestRegressor(**params)
            rf.fit(X_train, y_train)
            y_pred = rf.predict(X_val)
            rmse = mean_squared_error(y_val, y_pred, squared=False)

            return {'loss': rmse, 'status': STATUS_OK}

    search_space = {
        'max_depth': scope.int(hp.quniform('max_depth', 1, 20, 1)),
        'n_estimators': scope.int(hp.quniform('n_estimators', 10, 50, 1)),
        'min_samples_split': scope.int(hp.quniform('min_samples_split', 2, 10, 1)),
        'min_samples_leaf': scope.int(hp.quniform('min_samples_leaf', 1, 4, 1)),
        'random_state': 42
    }

    rstate = np.random.default_rng(42)  # for reproducible results
    fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=num_trials,
        trials=Trials(),
        rstate=rstate
    )
    mlflow.log_metric("rmse",rmse)



In [11]:
run_optimization(15)

  0%|          | 0/15 [00:05<?, ?trial/s, best loss=?]


KeyboardInterrupt: 