In [6]:
# Model Tracing Enable for MLFlow 
# Enable auto-tracing

In [7]:
# import mlflow
# mlflow.sklearn.autolog()

In [8]:
# Experiment Tracking

In [9]:
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor

# Enable MLflow's automatic experiment tracking for scikit-learn
mlflow.sklearn.autolog()

# Load the training dataset
db = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

rf = RandomForestRegressor(n_estimators=100, max_depth=6, max_features=3)
# MLflow triggers logging automatically upon model fitting
rf.fit(X_train, y_train)

2025/04/11 12:01:24 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'bbac18a1288d4334844f56bc0597655e', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current sklearn workflow


In [5]:
# Serving Models
## https://github.com/mlflow/mlflow?tab=readme-ov-file#serving-models-doc

In [11]:
import os
import subprocess
model_id="bbac18a1288d4334844f56bc0597655e"
cmd = "mlflow models serve --model-uri runs:/" + model_id + "/model --host 0.0.0.0 --port 5000  --env-manager local"

returned_value = subprocess.call(cmd, shell=True)  # returns the exit code in unix
print('returned value:', returned_value)


Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 5143.86it/s] 
2025/04/11 12:11:05 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
2025/04/11 12:11:05 INFO mlflow.pyfunc.backend: === Running command 'exec uvicorn --host 0.0.0.0 --port 5000 --workers 1 mlflow.pyfunc.scoring_server.app:app'
INFO:     Started server process [2191]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:5000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [2191]

Aborted!


KeyboardInterrupt: 

In [12]:
#Observability (Doc)

In [13]:
import mlflow
from openai import OpenAI

# Enable tracing for OpenAI
mlflow.openai.autolog()

# Query OpenAI LLM normally
response = OpenAI().chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": "Hi!"}],
    temperature=0.1,
)

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: your-api*****here. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [14]:
# Evaluating Models (Doc)

In [15]:
#pip install tiktoken==0.9.0

In [16]:
import mlflow
import pandas as pd

# Evaluation set contains (1) input question (2) model outputs (3) ground truth
df = pd.DataFrame(
    {
        "inputs": ["What is MLflow?", "What is Spark?"],
        "outputs": [
            "MLflow is an innovative fully self-driving airship powered by AI.",
            "Sparks is an American pop and rock duo formed in Los Angeles.",
        ],
        "ground_truth": [
            "MLflow is an open-source platform for managing the end-to-end machine learning (ML) "
            "lifecycle.",
            "Apache Spark is an open-source, distributed computing system designed for big data "
            "processing and analytics.",
        ],
    }
)
eval_dataset = mlflow.data.from_pandas(
    df, predictions="outputs", targets="ground_truth"
)

# Start an MLflow Run to record the evaluation results to
with mlflow.start_run(run_name="evaluate_qa"):
    # Run automatic evaluation with a set of built-in metrics for question-answering models
    results = mlflow.evaluate(
        data=eval_dataset,
        model_type="question-answering",
    )

print(results.tables["eval_results_table"])

2025/04/11 12:12:26 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
Downloading artifacts: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 699.05it/s]

            inputs                                       ground_truth  \
0  What is MLflow?  MLflow is an open-source platform for managing...   
1   What is Spark?  Apache Spark is an open-source, distributed co...   

                                             outputs  token_count  
0  MLflow is an innovative fully self-driving air...           14  
1  Sparks is an American pop and rock duo formed ...           14  





In [17]:
print("MLFlow Model Evaluation , Observability and tracing - sample done!")

MLFlow Model Evaluation , Observability and tracing - sample done!
