In [1]:
! pip install mlflow

Collecting mlflow
  Downloading mlflow-2.17.2-py3-none-any.whl.metadata (29 kB)
Collecting mlflow-skinny==2.17.2 (from mlflow)
  Downloading mlflow_skinny-2.17.2-py3-none-any.whl.metadata (30 kB)
Collecting alembic!=1.10.0,<2 (from mlflow)
  Using cached alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)
Collecting docker<8,>=4.0.0 (from mlflow)
  Using cached docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow)
  Using cached graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting sqlalchemy<3,>=1.4.0 (from mlflow)
  Downloading SQLAlchemy-2.0.37-cp38-cp38-macosx_10_9_x86_64.whl.metadata (9.7 kB)
Collecting gunicorn<24 (from mlflow)
  Using cached gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting cloudpickle<4 (from mlflow-skinny==2.17.2->mlflow)
  Using cached cloudpickle-3.1.1-py3-none-any.whl.metadata (7.1 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-skinny==2.17.2->mlflow)
  Using cached databricks_sdk-0.42.0-py3-none-any

In [6]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

# Load dataset
data = load_diabetes()
X = data.data
y = data.target

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define parameters for the experiment
n_estimators = 100
max_depth = 6

# Start an MLflow run
with mlflow.start_run():
    # Log parameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    
    # Train the model
    model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)
    
    # Evaluate the model
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    
    # Log metrics
    mlflow.log_metric("mse", mse)
    
    # Log the model itself
    mlflow.sklearn.log_model(model, "model")
    
    print(f"Run completed with MSE: {mse}")




Run completed with MSE: 2878.820810419161


In [5]:
! mlflow ui

[2025-02-01 17:50:35 -0500] [5568] [INFO] Starting gunicorn 23.0.0
[2025-02-01 17:50:35 -0500] [5568] [INFO] Listening at: http://127.0.0.1:5000 (5568)
[2025-02-01 17:50:35 -0500] [5568] [INFO] Using worker: sync
[2025-02-01 17:50:35 -0500] [5570] [INFO] Booting worker with pid: 5570
[2025-02-01 17:50:35 -0500] [5571] [INFO] Booting worker with pid: 5571
[2025-02-01 17:50:35 -0500] [5572] [INFO] Booting worker with pid: 5572
[2025-02-01 17:50:35 -0500] [5573] [INFO] Booting worker with pid: 5573
^C
[2025-02-01 18:07:32 -0500] [5568] [INFO] Handling signal: int
[2025-02-01 18:07:32 -0500] [5573] [INFO] Worker exiting (pid: 5573)
[2025-02-01 18:07:32 -0500] [5570] [INFO] Worker exiting (pid: 5570)
[2025-02-01 18:07:32 -0500] [5572] [INFO] Worker exiting (pid: 5572)
[2025-02-01 18:07:32 -0500] [5571] [INFO] Worker exiting (pid: 5571)


In [4]:
! mlflow run . -P n_estimators=150 -P max_depth=8

2025/02/01 17:49:20 ERROR mlflow.cli: === Could not find main among entry points [] or interpret main as a runnable script. Supported script file extensions: ['.py', '.sh'] ===
