In [1]:
!pip list

Package                   Version
------------------------- --------------
aiofiles                  22.1.0
aiosqlite                 0.21.0
alembic                   1.16.5
annotated-types           0.7.0
anyio                     4.10.0
argon2-cffi               25.1.0
argon2-cffi-bindings      25.1.0
arrow                     1.3.0
asttokens                 3.0.0
async-lru                 2.0.5
attrs                     25.3.0
babel                     2.17.0
beautifulsoup4            4.13.5
bleach                    6.2.0
blinker                   1.9.0
boto3                     1.26.121
botocore                  1.29.165
category-encoders         2.6.4
certifi                   2025.8.3
cffi                      2.0.0
charset-normalizer        3.4.3
choreographer             1.0.10
click                     8.1.8
cloudpickle               2.2.1
comm                      0.2.3
contourpy                 1.3.0
cycler                    0.12.1
Cython                    3.1.3
dash     

In [2]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

# loads the diabetes dataset
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# run description (just metadata)
desc = "the simplest possible example"

# connects to the Mlflow tracking server that you started above
mlflow.set_tracking_uri("http://10.43.100.83:5001")

# executes the run
with mlflow.start_run(run_name="no_artifacts_logged2", description=desc) as run:
    rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

In [6]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

# loads the diabetes dataset
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# run description (just metadata)
desc = "the simplest possible example"

# connects to the Mlflow tracking server that you started above
mlflow.set_tracking_uri("http://10.43.101.149:5000")
mlflow.set_experiment("mlflow_tracking_examples_class")

# executes the run
with mlflow.start_run(run_name="no_artifacts_logged", description=desc) as run:
    rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

2024/03/11 19:36:45 INFO mlflow.tracking.fluent: Experiment with name 'mlflow_tracking_examples_class2' does not exist. Creating a new experiment.


In [5]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

# loads the diabetes dataset
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# run description (just metadata)
desc = "the simplest possible example"

# connects to the Mlflow tracking server that you started above
mlflow.set_tracking_uri("http://10.43.101.149:5000")
mlflow.set_experiment("mlflow_tracking_examples_class")


with mlflow.start_run(run_name="params_no_artifacts_logged") as run:

    params = {"n_estimators":100, "max_depth":6, "max_features":3}

    rf = RandomForestRegressor(**params)
    rf.fit(X_train, y_train)

    mlflow.log_params(params)
    mlflow.log_param("my_extra_param", "extra_param_value")
    mlflow.log_metric("my_metric", 0.8)
    mlflow.set_tag("my_tag", "my_tag_value")

In [2]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

import os
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://10.43.101.149:9000"
os.environ['AWS_ACCESS_KEY_ID'] = 'admin'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'supersecret'

# loads the diabetes dataset
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# run description (just metadata)
desc = "the simplest possible example"

# connects to the Mlflow tracking server that you started above
mlflow.set_tracking_uri("http://10.43.101.149:5000")
mlflow.set_experiment("mlflow_tracking_examples_class")

with mlflow.start_run(run_name="logged_artifacts") as run:
    params = {"n_estimators":100, "max_depth":6, "max_features":3}

    rf = RandomForestRegressor(**params)
    rf.fit(X_train, y_train)

    mlflow.log_params(params)
    mlflow.sklearn.log_model(
      sk_model=rf,
      artifact_path="random_forest_regressor"
    )

In [3]:
print('tracking uri:', mlflow.get_tracking_uri())
print('artifact uri:', mlflow.get_artifact_uri())

tracking uri: http://10.43.101.149:5000
artifact uri: s3://mlflows33/artifacts/1/82ff6293aabb4a3f96ef2b53b22d6849/artifacts


In [7]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

import os
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://10.43.101.149:9000"
os.environ['AWS_ACCESS_KEY_ID'] = 'admin'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'supersecret'

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# connect to mlflow
mlflow.set_tracking_uri("http://10.43.101.149:5000")
mlflow.set_experiment("mlflow_tracking_examples_class2")

# this is the magical stuff
mlflow.autolog(log_input_examples=True, log_model_signatures=True)

# train the model
rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
rf.fit(X_train, y_train)

2024/03/11 19:37:04 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/03/11 19:37:05 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'd12355f170804638bd5b9371c3db899b', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [4]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

import os
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://10.43.101.149:9000"
os.environ['AWS_ACCESS_KEY_ID'] = 'admin'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'supersecret'

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# connect to mlflow
mlflow.set_tracking_uri("http://10.43.101.149:5000")
mlflow.set_experiment("mlflow_tracking_examples_class")

mlflow.autolog(log_model_signatures=True, log_input_examples=True)

with mlflow.start_run(run_name="autolog_with_named_run") as run:
    rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

2024/03/11 19:30:48 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [2]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

import os
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://10.43.101.149:9000"
os.environ['AWS_ACCESS_KEY_ID'] = 'admin'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'supersecret'

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# connect to mlflow
mlflow.set_tracking_uri("http://10.43.101.149:5000")
mlflow.set_experiment("mlflow_tracking_examples_class")

mlflow.autolog(log_model_signatures=True, log_input_examples=True)

with mlflow.start_run(run_name="main_run_for_nested") as run:
    for estimators in range(20, 100, 20):
        with mlflow.start_run(run_name=f"nested_{estimators}_estimators", nested=True) as nested:
            rf = RandomForestRegressor(n_estimators=estimators, max_depth=6, max_features=3)
            rf.fit(X_train, y_train)

2024/03/11 19:11:23 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [1]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV

import os
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://10.43.101.149:9000"
os.environ['AWS_ACCESS_KEY_ID'] = 'admin'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'supersecret'

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# connect to mlflow
mlflow.set_tracking_uri("http://10.43.101.149:5000")
mlflow.set_experiment("mlflow_tracking_examples_class1")

mlflow.autolog(log_model_signatures=True, log_input_examples=True)

params = {
  "n_estimators": [33, 66, 200],
  "max_depth": [2, 4, 6],
  "max_features": [3, 4, 5]
}

rf = RandomForestRegressor()
searcher = GridSearchCV(estimator=rf, param_grid=params)

with mlflow.start_run(run_name="autolog_with_grid_search") as run:
    searcher.fit(X_train, y_train)

2024/03/11 19:47:18 INFO mlflow.tracking.fluent: Experiment with name 'mlflow_tracking_examples_class1' does not exist. Creating a new experiment.
2024/03/11 19:47:18 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/03/11 19:47:52 INFO mlflow.sklearn.utils: Logging the 5 best runs, 22 runs will be omitted.
