# Training ML Models On-Premise

Assume that you run this notebook in your local machine and send logs to MLflow server running on AWS.

### Set up environments

In [1]:
!python --version

Python 3.10.13


In [2]:
!pip freeze | grep -E "mlflow|boto3|urllib3|scikit-learn"

boto3==1.28.68
mlflow==2.6.0
scikit-learn==1.3.1
urllib3==1.26.18


In [3]:
# !pip install mlflow==2.6.0 "urllib3<2.0" boto3

### Set MLflow Tracking URI

In [4]:
MLFLOW_TRACKING_URI="http://ec2-192-168-0-1.compute-1.amazonaws.com:5000"

### Training and Store a model in MLflow

In [5]:
import mlflow
from mlflow.models import infer_signature

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)

In [6]:
with mlflow.start_run() as run:
    # Load the diabetes dataset.
    db = load_diabetes()
    X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

    # Create and train models.
    rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
    rf.fit(X_train, y_train)

    # Use the model to make predictions on the test dataset.
    predictions = rf.predict(X_test)
    print(predictions)

    signature = infer_signature(X_test, predictions)
    mlflow.sklearn.log_model(rf, "model", signature=signature)

    print("Run ID: {}".format(run.info.run_id))

[134.84771182 191.23596051 184.67631963  97.9421355  183.42818541
 228.08878437  96.29232487 233.46635235 146.7487253  152.97977485
 179.5038669   83.11852723 167.97799283 157.85705033 138.06926848
 125.57536045 210.67315995 131.69258842 133.06536317  85.86407134
 217.93116615  89.75365991 258.9458341  210.43646154 238.44002486
 200.58148653 134.95005244 115.2921596  121.35298745 124.1596838
 231.62212314 119.042277   140.91713431 114.68066503 211.51184488
 213.78219189 252.70368836 255.74874982 114.61359683 187.48947093
 227.52336764 158.64590435 110.64126289 244.59467908 133.26841168
 164.25311912 276.32071397 165.88973568 178.88201176  98.74355029
 176.07436755 143.08073036 105.15011859 223.03413936 109.41638828
 177.29935774 165.8588871   95.44567852 167.57436773 150.61557528
 155.01299663 231.32293936 101.79796256 106.48917651 240.43120135
 243.42829515 183.28954846  95.58345431 110.12613212 179.65300583
 129.26900326 133.35152218 239.0892909  175.16030793 251.34946107
 195.754721

### Make Predictions with the stored Model

In [7]:
import mlflow

logged_model = 'runs:/9c6bbde5cd2041cfab006f143130f797/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)
predictions = loaded_model.predict(X_test)
print(predictions)

[134.84771182 191.23596051 184.67631963  97.9421355  183.42818541
 228.08878437  96.29232487 233.46635235 146.7487253  152.97977485
 179.5038669   83.11852723 167.97799283 157.85705033 138.06926848
 125.57536045 210.67315995 131.69258842 133.06536317  85.86407134
 217.93116615  89.75365991 258.9458341  210.43646154 238.44002486
 200.58148653 134.95005244 115.2921596  121.35298745 124.1596838
 231.62212314 119.042277   140.91713431 114.68066503 211.51184488
 213.78219189 252.70368836 255.74874982 114.61359683 187.48947093
 227.52336764 158.64590435 110.64126289 244.59467908 133.26841168
 164.25311912 276.32071397 165.88973568 178.88201176  98.74355029
 176.07436755 143.08073036 105.15011859 223.03413936 109.41638828
 177.29935774 165.8588871   95.44567852 167.57436773 150.61557528
 155.01299663 231.32293936 101.79796256 106.48917651 240.43120135
 243.42829515 183.28954846  95.58345431 110.12613212 179.65300583
 129.26900326 133.35152218 239.0892909  175.16030793 251.34946107
 195.754721