In [1]:
import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
# train my machine learning model 
# heart disease prediction model

data = pd.read_csv(r'C:\Users\ankit_aj\Desktop\MLOPS-case_studies\Demo_050725_DVC\SKILLFY_21_JUNE_25\data\heart.csv')
data.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [5]:
from sklearn.metrics import accuracy_score, f1_score, precision_score
from sklearn.ensemble import RandomForestClassifier

# supervise machine learning classfication model 
X = data.drop('target', axis=1)
y = data['target']
# Since RandomForestRegressor is a regression model, we need to convert its predictions to binary for classification metrics
# Predict and threshold at 0.5
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

n_estimators = 2
max_depth=1
random_state=23
model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=random_state)
model.fit(X_test, y_test)
y_pred = model.predict(X_test)

precision = precision_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)


print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Precision:", precision)


# Log the model with MLflow
mlflow.set_experiment("1708_mlops_heart_case")

with mlflow.start_run():
    #mlflow.sklearn.log_model(model, "model")
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("random_state", random_state)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("f1_score", f1)
    mlflow.log_metric("precision", precision)
    mlflow.sklearn.log_model(model, "random_forest_classifier")
    print("Model logged with MLflow")
    print("Run ID:", mlflow.active_run().info.run_id)
    print(f"Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, F1 Score: {f1:.4f}")




Accuracy: 0.8524590163934426
F1 Score: 0.8732394366197183
Precision: 0.7948717948717948




Model logged with MLflow
Run ID: ba51dea241f244668abc3fd2c2dc69a5
Accuracy: 0.8525, Precision: 0.7949, F1 Score: 0.8732


In [6]:
from sklearn.tree import DecisionTreeClassifier

# Train a Decision Tree Classifier
dt_max_depth = 5
dt_random_state = 37
dt_model = DecisionTreeClassifier(max_depth=dt_max_depth, random_state=dt_random_state)
dt_model.fit(X_train, y_train)
dt_y_pred = dt_model.predict(X_test)

dt_precision = precision_score(y_test, dt_y_pred)
dt_accuracy = accuracy_score(y_test, dt_y_pred)
dt_f1 = f1_score(y_test, dt_y_pred)

print("Decision Tree Accuracy:", dt_accuracy)
print("Decision Tree F1 Score:", dt_f1)
print("Decision Tree Precision:", dt_precision)

# Log the Decision Tree model with MLflow
mlflow.set_experiment("1708_dt_mlops_heart_case")

with mlflow.start_run():
    mlflow.sklearn.log_model(dt_model, "decision_tree_classifier")
    mlflow.log_param("max_depth", dt_max_depth)
    mlflow.log_param("random_state", dt_random_state)
    mlflow.log_metric("accuracy", dt_accuracy)
    mlflow.log_metric("f1_score", dt_f1)
    mlflow.log_metric("precision", dt_precision)
    print("Decision Tree model logged with MLflow")
    print("Run ID:", mlflow.active_run().info.run_id)
    print(f"Accuracy: {dt_accuracy:.4f}, Precision: {dt_precision:.4f}, F1 Score: {dt_f1:.4f}")

2025/08/17 08:33:44 INFO mlflow.tracking.fluent: Experiment with name '1708_dt_mlops_heart_case' does not exist. Creating a new experiment.


Decision Tree Accuracy: 0.819672131147541
Decision Tree F1 Score: 0.8307692307692308
Decision Tree Precision: 0.8181818181818182




Decision Tree model logged with MLflow
Run ID: a649e0bb16a249bc9aef89c4f6c26092
Accuracy: 0.8197, Precision: 0.8182, F1 Score: 0.8308


In [7]:
# challenge: run the dataset with XGBoost and log the model with MLflow
# you will have to install xgboost using pip install xgboost
# Train XGBoost model
import xgboost as xgb
xgb_model = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=3,
    learning_rate=0.1,
    random_state=42
)

# Fit the model
xgb_model.fit(X_train, y_train)

# Make predictions
xgb_y_pred = xgb_model.predict(X_test)

# Calculate metrics
xgb_precision = precision_score(y_test, xgb_y_pred)
xgb_accuracy = accuracy_score(y_test, xgb_y_pred)
xgb_f1 = f1_score(y_test, xgb_y_pred)

print("XGBoost Accuracy:", xgb_accuracy)
print("XGBoost F1 Score:", xgb_f1)
print("XGBoost Precision:", xgb_precision)

# Log the XGBoost model with MLflow
mlflow.set_experiment("1708_xgboost_mlops_heart_case")

with mlflow.start_run():
    mlflow.xgboost.log_model(xgb_model, "xgboost_classifier")
    mlflow.log_params({
        "n_estimators": 100,
        "max_depth": 3,
        "learning_rate": 0.1,
        "random_state": 42
    })
    mlflow.log_metrics({
        "accuracy": xgb_accuracy,
        "f1_score": xgb_f1,
        "precision": xgb_precision
    })
    print("XGBoost model logged with MLflow")
    print("Run ID:", mlflow.active_run().info.run_id)
    print(f"Accuracy: {xgb_accuracy:.4f}, Precision: {xgb_precision:.4f}, F1 Score: {xgb_f1:.4f}")



2025/08/17 09:04:31 INFO mlflow.tracking.fluent: Experiment with name '1708_xgboost_mlops_heart_case' does not exist. Creating a new experiment.


XGBoost Accuracy: 0.8360655737704918
XGBoost F1 Score: 0.8387096774193549
XGBoost Precision: 0.8666666666666667


  self.get_booster().save_model(fname)


XGBoost model logged with MLflow
Run ID: 6236e6c407414344bf4d2fd4283dd637
Accuracy: 0.8361, Precision: 0.8667, F1 Score: 0.8387


In [20]:
# ============================================
# Load model from MLflow Experiment and Predict
# ============================================

import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient

# 1. Tracking URI must be the same as training
mlflow.set_tracking_uri("file:./mlruns")


# 2. Replace with your experiment name or ID
experiment_name = "1708_xgboost_mlops_heart_case"
exp = mlflow.get_experiment_by_name(experiment_name)
print(exp)
experiment_id = exp.experiment_id
print("Using Experiment ID:", experiment_id)

# 3. Get latest run from this experiment
client = MlflowClient()
runs = client.search_runs(experiment_id, order_by=["attributes.start_time desc"], max_results=1)

if not runs:
    raise Exception("No runs found in experiment:", experiment_name)

run = runs[0]
run_id = run.info.run_id
print("Latest Run ID:", run_id)

# 4. Construct model path
model_uri = f"runs:/{run_id}/xgboost_classifier"
print("Loading model from:", model_uri)

# 5. Load model
model = mlflow.xgboost.load_model(model_uri)

# 6. Run predictions on X_test (must reuse the same X_test as in training script)
y_pred = model.predict(X_test)

print("Predictions:", y_pred[:10])
print("True Labels:", y_test[:10])


<Experiment: artifact_location='file:///c:/Users/ankit_aj/Desktop/MLOPS-case_studies/Demo_050725_DVC/Skillfy_27_July/skillfy_morn_2707/skillfy_morn_2707/MLFLOW_demo/mlruns/948957668212674834', creation_time=1755401671169, experiment_id='948957668212674834', last_update_time=1755401671169, lifecycle_stage='active', name='1708_xgboost_mlops_heart_case', tags={}>
Using Experiment ID: 948957668212674834
Latest Run ID: 6236e6c407414344bf4d2fd4283dd637
Loading model from: runs:/6236e6c407414344bf4d2fd4283dd637/xgboost_classifier
Predictions: [0 0 1 0 1 1 1 0 0 0]
True Labels: 179    0
228    0
111    1
246    0
60     1
9      1
119    1
223    0
268    0
33     1
Name: target, dtype: int64


In [4]:
import subprocess
import time

# Start the MLflow UI on port 5000
subprocess.Popen(["mlflow", "ui", "--port", "5000"])

# Optional: Give it time to start
time.sleep(3)

print("MLflow UI running at http://localhost:5000")


MLflow UI running at http://localhost:5000


In [None]:
# you will have to install mlflow using pip install mlflow
# you just need to run this script and then open the MLflow UI in your browser at http://localhost:5000
# you can also run the mlflow ui command in your terminal to start the MLflow UI