In [36]:
import mlflow

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
#import mlflow.xgboost


In [4]:
mlflow.set_tracking_uri("http://localhost:5002/")
mlflow.autolog()

exp_name = "XGBoost-Test1"
artifact_loc= "/Users/vijaya/DS_workspace/MLflow/mlruns"

tags = {"team": "DS team",
        "dataset": "Wine",
        "release.version": "2.2.2"}
try:
    experiment = mlflow.get_experiment_by_name(exp_name)
    experiment_id = experiment.experiment_id
except AttributeError:
    experiment_id = mlflow.create_experiment(name=exp_name,artifact_location=artifact_loc,tags=tags,)
mlflow.set_experiment(exp_name)

2023/06/09 22:26:13 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.
2023/06/09 22:26:14 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


<Experiment: artifact_location='/Users/vijaya/DS_workspace/MLflow/mlruns', creation_time=1686329774052, experiment_id='775259865547496682', last_update_time=1686329774052, lifecycle_stage='active', name='XGBoost-Test1', tags={'dataset': 'Wine', 'release.version': '2.2.2', 'team': 'DS team'}>

In [5]:
exp = mlflow.get_experiment(experiment_id)

print(f"Name: {exp.name}")
print(f"Artifact Location: {exp.artifact_location}")
print(f"Experiment-id: {exp.experiment_id}")
print(f"Lifecycle-stage: {exp.lifecycle_stage}")

Name: XGBoost-Test1
Artifact Location: /Users/vijaya/DS_workspace/MLflow/mlruns
Experiment-id: 775259865547496682
Lifecycle-stage: active


# Building model

In [6]:
# 1. loading data
source = "https://raw.githubusercontent.com/stedy/Machine-Learning-with-R-datasets/master/winequality-white.csv"
data=pd.read_csv(source)

# 2. splitting data into train, test
train, test = train_test_split(data, test_size=0.2, random_state=42)
x_train = train.drop("quality", axis=1)
x_test = test.drop("quality", axis=1)

y_train = train["quality"]
y_test = test["quality"]

# Define model and model metrics tracking

In [16]:
with mlflow.start_run(experiment_id=exp.experiment_id, run_name='run_9') as run:
    run_id = run.info.run_id
    experiment_id = run.info.experiment_id
    
    print("MLFlow")
    print("RUN ID:", run_id)
    print("Experiment ID:", experiment_id)
    print("Experiment Name:", mlflow.get_experiment(experiment_id).name)
    
    
    #model parameters
    params = {
            'max_depth':4,
            'min_child_weight':1,
            'random_state':1600
            }

    
    # MLflow parameters
    print("MLflow Parameters:")
    print(" - max_depth:", params['max_depth'])
    print(" - min_child_weight:", params['min_child_weight'])
    
    model = xgb.XGBRegressor(**params)
    model.fit(x_train, y_train)
    
     # MLflow Artifacts
    prediction = model.predict(x_test)
    #print("Prediction:", prediction)
    rmse = np.sqrt(mean_squared_error(y_test, prediction))
    mse = mean_squared_error(y_test, prediction)
    r2 = r2_score(y_test, prediction)
    print("Metrics :")
    print("- RMSE:", rmse)
    print("- MSE:", mse)
    print("- R2:", r2)
    
    
    # MLflow metrics
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("R2", r2)
    mlflow.log_param("MaxDepth", params['max_depth'])
    mlflow.log_param("MinChildWeight", params['min_child_weight'])
    mlflow.log_param("RandomState", params['random_state'])
    



MLFlow
RUN ID: 77fba95975b34466a446425908fac231
Experiment ID: 775259865547496682
Experiment Name: XGBoost-Test1
MLflow Parameters:
 - max_depth: 4
 - min_child_weight: 1


<html lang=en>
<title>404 Not Found</title>
<h1>Not Found</h1>
<p>The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.</p>
'


Metrics :
- RMSE: 0.6471322795610154
- MSE: 0.4187801872498362
- R2: 0.4592708672785537


# Model Registration

In [38]:
#log MLflow model
with mlflow.start_run(run_name="Logged_Model_2"):
    mlflow.set_tag(tags)
    
    mlflow.log_param('max_depth',params['max_depth'])
    mlflow.log_param('min_child_weight',params['min_child_weight'])
    
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("R2", r2)
    
    mlflow.xgboost.log_model(model, "artifact", registered_model_name="XGBoost-logged-model-2")#saves the model to the MLflow tracking server.
#Ref:  https://mlflow.org/docs/latest/python_api/mlflow.sklearn.html#mlflow.sklearn.log_model

    mlflow.xgboost.save_model(model,path='/Users/vijaya/DS_workspace/MLflow/mlruns/models/saved_models/model-2')#saved the model locally to  a DBFS path.

Successfully registered model 'XGBoost-logged-model-2'.
2023/06/09 23:33:43 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: XGBoost-logged-model-2, version 1
Created version '1' of model 'XGBoost-logged-model-2'.


In [None]:
with mlflow.start_run(run_name='Sk_Elasticnet'):

        mlflow.set_tags(tags)
        #.....
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")
        mlflow.log_artifact(local_path='./train.py',    
                            artifact_path='code')

# Model Predictions using Registered Model

In [None]:
#predict with MLFlow model(SL-XGBoost-Test1)
model = mlflow.xgboost.load_model('models:///XGBoost_Model/Production')
print("Model: \n", model)
print("="*50)
print("Model:\n", model)
print("="*50)
prediction = model.predict(X_test)
# print("Prediction.type:", type(prediction))
# print("="*50)
# print("Prediction.shape:", prediction.shape)
# print("="*50)
print("Prediction:\n", prediction)
print("Prediction Done.")

In [35]:
mlflow.end_run()

# Ref:

https://medium.com/analytics-vidhya/manage-your-machine-learning-lifecycle-with-mlflow-in-python-d678d5f3c682