In [1]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

mlflow.autolog()

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

# Create and train models.
rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
rf.fit(X_train, y_train)

# Use the model to make predictions on the test dataset.
predictions = rf.predict(X_test)

2023/09/27 20:18:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/09/27 20:18:33 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.
2023/09/27 20:18:33 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID '82f3dd0891684d3aba89dbc96c80fdd2', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [4]:
!mlflow ui --port=5001

[2023-09-27 20:21:25 -0400] [341619] [INFO] Starting gunicorn 21.2.0
[2023-09-27 20:21:25 -0400] [341619] [INFO] Listening at: http://127.0.0.1:5001 (341619)
[2023-09-27 20:21:25 -0400] [341619] [INFO] Using worker: sync
[2023-09-27 20:21:25 -0400] [341623] [INFO] Booting worker with pid: 341623
[2023-09-27 20:21:25 -0400] [341627] [INFO] Booting worker with pid: 341627
[2023-09-27 20:21:25 -0400] [341628] [INFO] Booting worker with pid: 341628
[2023-09-27 20:21:26 -0400] [341629] [INFO] Booting worker with pid: 341629
^C
[2023-09-27 20:30:21 -0400] [341619] [INFO] Handling signal: int
[2023-09-27 20:30:21 -0400] [341628] [INFO] Worker exiting (pid: 341628)
[2023-09-27 20:30:21 -0400] [341623] [INFO] Worker exiting (pid: 341623)
[2023-09-27 20:30:21 -0400] [341629] [INFO] Worker exiting (pid: 341629)
[2023-09-27 20:30:21 -0400] [341627] [INFO] Worker exiting (pid: 341627)


In [5]:
import mlflow 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_diabetes

# set the experiment id
mlflow.set_experiment(experiment_id="0")

mlflow.autolog() 
db = load_diabetes() 

X_train, X_test, y_train, y_test = train_test_split(db.data, db.target) 

# Create and train models. 
rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3) 
rf.fit(X_train, y_train) 

# Use the model to make predictions on the test dataset. 
predictions = rf.predict(X_test)

2023/09/27 08:13:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2023/09/27 08:13:42 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.
2023/09/27 08:13:42 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'c3b22ae19ae444ebb2cfef718bcdaa44', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [6]:
!mlflow ui --port=5001

[2023-09-27 08:13:51 -0400] [254217] [INFO] Starting gunicorn 21.2.0
[2023-09-27 08:13:51 -0400] [254217] [INFO] Listening at: http://127.0.0.1:5001 (254217)
[2023-09-27 08:13:51 -0400] [254217] [INFO] Using worker: sync
[2023-09-27 08:13:51 -0400] [254218] [INFO] Booting worker with pid: 254218
[2023-09-27 08:13:51 -0400] [254219] [INFO] Booting worker with pid: 254219
[2023-09-27 08:13:51 -0400] [254220] [INFO] Booting worker with pid: 254220
[2023-09-27 08:13:51 -0400] [254221] [INFO] Booting worker with pid: 254221
^C
[2023-09-27 08:15:30 -0400] [254217] [INFO] Handling signal: int
[2023-09-27 08:15:30 -0400] [254219] [INFO] Worker exiting (pid: 254219)
[2023-09-27 08:15:30 -0400] [254218] [INFO] Worker exiting (pid: 254218)
[2023-09-27 08:15:30 -0400] [254220] [INFO] Worker exiting (pid: 254220)
[2023-09-27 08:15:30 -0400] [254221] [INFO] Worker exiting (pid: 254221)


### Store ML models

In [5]:
import mlflow
from mlflow.models import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

with mlflow.start_run() as run:
    # Load the diabetes dataset.
    db = load_diabetes()
    X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

    # Create and train models.
    rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

    # Use the model to make predictions on the test dataset.
    predictions = rf.predict(X_test)
    print(predictions)

    signature = infer_signature(X_test, predictions)
    mlflow.sklearn.log_model(rf, "model", signature=signature)

    print(f"Run ID: {run.info.run_id}")

[101.60445853 166.58880624  81.44530725 244.72847699 230.66278796
 211.47245312 206.77145734 101.03478303 146.23732091 226.23657994
  93.86100571 119.73989045 102.44673874 196.84296301 182.73052212
  98.33631254 206.50218619 185.09300542 113.33074754 121.06089015
 182.9809661  207.3695029  199.21143927 247.63675501 218.67288026
 154.75051856 114.55771794 135.9913129  107.04530356 161.04413034
 182.10358321 248.85277739 235.12541195 213.29298714 122.97824715
 102.21482345 173.32572024 124.61651218 137.06399614 179.15896731
 109.45267517 198.83737331 213.89892277  96.75704568 101.46535852
 120.45060306  92.22420199 143.19147861 167.24063782 131.37132572
 158.48457637 272.98167626  96.23642081 120.2996042  211.95597729
 104.08079352 176.62147535 122.56539834 224.36413898 121.49639844
 165.32212049 170.14773391 129.5932024   91.86114235 226.26600831
  82.43396368 151.8129733  120.7820993  180.4834001  192.39051206
 234.3566598  133.91935913 274.26832419 172.0523618  254.97546561
  99.61202

In [6]:
!mlflow ui --port=5001

[2023-09-27 20:38:23 -0400] [342146] [INFO] Starting gunicorn 21.2.0
[2023-09-27 20:38:23 -0400] [342146] [INFO] Listening at: http://127.0.0.1:5001 (342146)
[2023-09-27 20:38:23 -0400] [342146] [INFO] Using worker: sync
[2023-09-27 20:38:23 -0400] [342147] [INFO] Booting worker with pid: 342147
[2023-09-27 20:38:23 -0400] [342148] [INFO] Booting worker with pid: 342148
[2023-09-27 20:38:23 -0400] [342149] [INFO] Booting worker with pid: 342149
[2023-09-27 20:38:23 -0400] [342150] [INFO] Booting worker with pid: 342150
^C
[2023-09-27 20:49:11 -0400] [342146] [INFO] Handling signal: int
[2023-09-27 20:49:11 -0400] [342150] [INFO] Worker exiting (pid: 342150)
[2023-09-27 20:49:11 -0400] [342148] [INFO] Worker exiting (pid: 342148)
[2023-09-27 20:49:11 -0400] [342149] [INFO] Worker exiting (pid: 342149)
[2023-09-27 20:49:11 -0400] [342147] [INFO] Worker exiting (pid: 342147)


### Load a model

In [7]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

model = mlflow.sklearn.load_model("mlruns/0/a7edd2653458481ab60488297d06a781/artifacts/model/")
predictions = model.predict(X_test)
print(predictions)

[122.40261887 234.18467364 140.18766533 243.91707622 245.53061941
 187.89789308  96.28890875  88.89452932 135.70200448 107.04530356
 103.76999767 176.28014701 180.4834001   76.14683975 128.55257159
 185.09300542 143.69013658 150.82986054  97.45081585 112.9968235
 138.76118    116.49273203 101.60445853  87.30251689 122.56539834
  89.62155793 226.26600831 210.18484846 106.98860284 259.98304437
 260.79224118 109.14294108 211.76111755 135.42689954 120.7820993
  93.86100571 218.16837761 248.54687141  95.94761874 123.11095363
  90.80937315 106.67951664 260.53131605 274.26832419 173.32572024
 176.62147535 146.23732091 139.90918216  98.94524125 156.17888733
 215.7182531  195.07027538 141.03099423 155.00970735 103.6021406
 237.3877715   78.4928254  231.54420999 192.39051206 188.42045515
 103.50332736 190.70644941 107.2176039  158.99833758 186.97634425
  95.74975624 177.2550257  111.68677597  91.86114235 110.04041889
 212.80348334 167.24063782 226.23657994 269.06005143 196.84296301
 167.20063281