In [69]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split , cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report , f1_score, make_scorer
import matplotlib.pyplot as plt 
import seaborn as sns 
import warnings
warnings.filterwarnings('ignore')

In [19]:
# Step 1: Create an imbalanced binary classification dataset
X , y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8,
                            weights=[0.9, 0.1], random_state=42, n_classes=2, flip_y=0)

np.unique(y,return_counts=True)

(array([0, 1]), array([900, 100]))

In [20]:
# Split the dataset into training and testing sets
X_train , X_test , y_train , y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)


In [21]:
np.unique(y_train,return_counts=True)


(array([0, 1]), array([630,  70]))

In [24]:
import optuna

def objective(trial):
    params= {
        "C":trial.suggest_float("C",1e-1,1),
        "tol":trial.suggest_float("tol",1e-4, 1),
        "class_weight":trial.suggest_categorical("class_weight",[None, "balanced"]),
        "solver":trial.suggest_categorical("solver",['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'])
    }

    model = LogisticRegression(**params)
   # model.fit(X_train, y_train)

    scorer= make_scorer(f1_score,average='macro')
    scores = cross_val_score( model, X_train, y_train , cv=3, scoring =scorer, )
    
    return np.mean(scores)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=10)




[I 2025-10-20 12:06:11,054] A new study created in memory with name: no-name-4be7782f-9bf5-45a8-aaf0-f3fb769a766d
[I 2025-10-20 12:06:11,091] Trial 0 finished with value: 0.834686535055572 and parameters: {'C': 0.7279421927004299, 'tol': 0.9824039432884606, 'class_weight': None, 'solver': 'saga'}. Best is trial 0 with value: 0.834686535055572.
[I 2025-10-20 12:06:11,116] Trial 1 finished with value: 0.473684745693775 and parameters: {'C': 0.4687632533122452, 'tol': 0.7266811050582603, 'class_weight': None, 'solver': 'lbfgs'}. Best is trial 0 with value: 0.834686535055572.
[I 2025-10-20 12:06:11,136] Trial 2 finished with value: 0.473684745693775 and parameters: {'C': 0.16310304088124183, 'tol': 0.7984073560450802, 'class_weight': None, 'solver': 'newton-cg'}. Best is trial 0 with value: 0.834686535055572.
[I 2025-10-20 12:06:11,155] Trial 3 finished with value: 0.473684745693775 and parameters: {'C': 0.6678897993470665, 'tol': 0.6646360737980584, 'class_weight': 'balanced', 'solver': '

In [28]:
best_params = study.best_params
best_params

{'C': 0.785868089829688,
 'tol': 0.4007078792924085,
 'class_weight': None,
 'solver': 'saga'}

In [29]:
study.best_value

0.8371203624073024

In [40]:
lr_model=LogisticRegression(**best_params)
lr_model.fit(X_train, y_train)

y_pred= lr_model.predict(X_test)

report = classification_report(y_pred, y_test)
print(report)

              precision    recall  f1-score   support

           0       0.96      0.95      0.95       271
           1       0.57      0.59      0.58        29

    accuracy                           0.92       300
   macro avg       0.76      0.77      0.77       300
weighted avg       0.92      0.92      0.92       300



In [32]:
report_dict = classification_report(y_pred, y_test, output_dict=True)
report_dict

{'0': {'precision': 0.9555555555555556,
  'recall': 0.9485294117647058,
  'f1-score': 0.9520295202952029,
  'support': 272.0},
 '1': {'precision': 0.5333333333333333,
  'recall': 0.5714285714285714,
  'f1-score': 0.5517241379310345,
  'support': 28.0},
 'accuracy': 0.9133333333333333,
 'macro avg': {'precision': 0.7444444444444445,
  'recall': 0.7599789915966386,
  'f1-score': 0.7518768291131187,
  'support': 300.0},
 'weighted avg': {'precision': 0.9161481481481482,
  'recall': 0.9133333333333333,
  'f1-score': 0.9146676846078805,
  'support': 300.0}}

In [37]:
report_dict['macro avg']['f1-score']

0.7518768291131187

In [43]:
best_params

{'C': 0.785868089829688,
 'tol': 0.4007078792924085,
 'class_weight': None,
 'solver': 'saga'}

In [45]:
import mlflow 

mlflow.set_experiment("Fraudulation_detection")
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")

with mlflow.start_run(run_name="Logistic_model"):
    mlflow.log_params(best_params)
    mlflow.log_metrics({
        "Accuracy":report_dict['accuracy'],
        "Recall_class_0":report_dict['0']['recall'],
        "Recall_class_1":report_dict['1']['recall'],
        "Macro_avg_f1_score":report_dict['macro avg']['f1-score']

    })
    mlflow.sklearn.log_model(lr_model,"LogisticRegression")



🏃 View run Logistic_model at: http://127.0.0.1:5000/#/experiments/443604195133847130/runs/c9322f481e4848d5ac075dd1aff6958f
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/443604195133847130


<h2 align='center' style="color:blue"> ML Course: ML Flow Tutorial</h2>

### Synthetic datasets

#### Consider the case of fraud detection , 0 as non-fraudulation, 1 as fraudulation

In [2]:
X, y = make_classification(n_samples=1000, n_features=10, n_informative=4, n_redundant=6,
                           weights=[0.9,0.1], flip_y=0, random_state=42)
np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100]))

This is class imbalanced dataset

#### Split the datasets into trian and test set

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test , y_train, y_test = train_test_split(X,y, test_size=0.3, stratify=y, random_state=42)
print(f"X_train shape = {X_train.shape}, & X_test shape = {X_test.shape}")
print(f"y_train shape = {y_train.shape}, & y_test shape = {y_test.shape}")

X_train shape = (700, 10), & X_test shape = (300, 10)
y_train shape = (700,), & y_test shape = (300,)


In [4]:
np.unique(y_train, return_counts=True)

(array([0, 1]), array([630,  70]))

In [5]:
X_train[:3]

array([[-0.09746084, -0.54713298, -0.45329761, -0.39949257, -1.24317702,
         0.91328287,  0.91112442,  0.3283417 ,  0.05025216,  0.11136209],
       [-0.69880226, -1.74284589,  0.02388394, -1.17066738, -2.21222949,
         1.68975902,  1.74133891,  2.0845216 , -1.29035092,  0.6769703 ],
       [ 0.22778537,  0.76733549, -1.03634499,  2.56103476, -0.13517232,
         1.44036705,  0.63423978, -1.11945503,  0.36707846,  0.44063741]])

In [6]:
y_train[:10]

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])

### Experiment 1: Logistic Regression Model

In [24]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

log_reg = LogisticRegression(C=1, solver="liblinear")
log_reg.fit(X_train,y_train)

y_pred_log_reg = log_reg.predict(X_test)
print(classification_report(y_test, y_pred_log_reg))

              precision    recall  f1-score   support

           0       0.91      0.99      0.95       270
           1       0.71      0.17      0.27        30

    accuracy                           0.91       300
   macro avg       0.81      0.58      0.61       300
weighted avg       0.89      0.91      0.88       300



### Experiment 2: Random Forest Classifier

In [25]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators=30, max_depth=5)
rf_clf.fit(X_train,y_train)

y_pred_rf_clf = rf_clf.predict(X_test)
print(classification_report(y_test, y_pred_rf_clf))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       270
           1       0.92      0.40      0.56        30

    accuracy                           0.94       300
   macro avg       0.93      0.70      0.76       300
weighted avg       0.94      0.94      0.93       300



### Experiment 3: XGBoost Classifier

In [27]:
from xgboost import XGBClassifier
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_clf.fit(X_train,y_train)

y_pred_xgb_clf = xgb_clf.predict(X_test)
print(classification_report(y_test, y_pred_xgb_clf))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98       270
           1       0.86      0.80      0.83        30

    accuracy                           0.97       300
   macro avg       0.92      0.89      0.90       300
weighted avg       0.97      0.97      0.97       300



### Experiment 4: Handle class imbalance using SMOTETomek and then Train XGBoost

In [32]:
np.unique(y_train, return_counts=True)

(array([0, 1]), array([630,  70]))

In [34]:
from imblearn.combine import SMOTETomek 
smt= SMOTETomek(random_state=42)

X_train_smt, y_train_smt = smt.fit_resample(X_train, y_train)

np.unique(y_train_smt, return_counts=True)

(array([0, 1]), array([627, 627]))

In [35]:
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_clf.fit(X_train_smt, y_train_smt)
y_pred_xgb = xgb_clf.predict(X_test)
print(classification_report(y_test, y_pred_xgb))

              precision    recall  f1-score   support

           0       0.98      0.97      0.98       270
           1       0.78      0.83      0.81        30

    accuracy                           0.96       300
   macro avg       0.88      0.90      0.89       300
weighted avg       0.96      0.96      0.96       300



<h2 align="center" style="color:blue">Track Experiments Using MLFlow</h2>

In [39]:
models = [
    ("Logistic Regression",
     {"C":1, "solver":"liblinear"},
     LogisticRegression(C=1, solver='liblinear'),
     (X_train,y_train),
     (X_test,y_test)
     ),

    ("Random Forest Classifier",
     {"n_estimators":30, "max_depth":5},
     RandomForestClassifier(n_estimators=30, max_depth=5),
     (X_train,y_train),
     (X_test,y_test)
     ),

    ( "XGBoost Classifier",
    {"use_label_encoder": False, "eval_metric": 'logloss'},
     XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
     (X_train,y_train),
     (X_test,y_test)
     ), 
     
    ("XGBoost Classifier with SMOTE",
    {"use_label_encoder": False, "eval_metric": 'logloss'},
     XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
     (X_train_smt,y_train_smt),
     (X_test,y_test)
     ), 
]

In [45]:
results = []
for model_name, params, model, train_set, test_set in models:
  X_train= train_set[0]
  y_train = train_set[1]
  X_test = test_set[0]
  y_test= test_set[1]

  model.fit(X_train,y_train)
  y_pred = model.predict(X_test)
  report = classification_report(y_test,y_pred, output_dict=True)
  results.append(report) 
  
len(results)

4

In [46]:
results[0]

{'0': {'precision': 0.9146757679180887,
  'recall': 0.9925925925925926,
  'f1-score': 0.9520426287744227,
  'support': 270.0},
 '1': {'precision': 0.7142857142857143,
  'recall': 0.16666666666666666,
  'f1-score': 0.2702702702702703,
  'support': 30.0},
 'accuracy': 0.91,
 'macro avg': {'precision': 0.8144807411019015,
  'recall': 0.5796296296296296,
  'f1-score': 0.6111564495223465,
  'support': 300.0},
 'weighted avg': {'precision': 0.8946367625548511,
  'recall': 0.91,
  'f1-score': 0.8838653929240076,
  'support': 300.0}}

In [64]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost

# Initialize the mlflow
mlflow.set_experiment("Anamoly Detection")
mlflow.set_tracking_uri("http://localhost:5000")  #"http://localhost:5000"
for i , element in enumerate(models):
    model_name =element[0]
    params =element[1]
    model = element[2]
    
    result =results[i]

    with mlflow.start_run(run_name=model_name):
        mlflow.log_param("params",params)
        mlflow.log_metric("accuracy",result['accuracy'])
        mlflow.log_metric("recall_class_1",result['1']['recall'])
        mlflow.log_metric("recall_class_0",result['0']['recall'])
        mlflow.log_metric("f1_score_macro",result['macro avg']['f1-score'])

        if "XGB" in model_name:
            mlflow.xgboost.log_model(model," model")

        else:
            mlflow.sklearn.log_model(model, "model")    



    
    

2025/10/21 10:16:23 INFO mlflow.tracking.fluent: Experiment with name 'Anamoly Detection' does not exist. Creating a new experiment.


🏃 View run Logistic Regression at: http://localhost:5000/#/experiments/837861923811052143/runs/b53143a1da0c40a197e2f11f23aaf916
🧪 View experiment at: http://localhost:5000/#/experiments/837861923811052143




🏃 View run Random Forest Classifier at: http://localhost:5000/#/experiments/837861923811052143/runs/83dba2c2a955434ebe307b1125a8dc09
🧪 View experiment at: http://localhost:5000/#/experiments/837861923811052143




🏃 View run XGBoost Classifier at: http://localhost:5000/#/experiments/837861923811052143/runs/19254407ac2946d9b02b8fd4d0213f58
🧪 View experiment at: http://localhost:5000/#/experiments/837861923811052143




🏃 View run XGBoost Classifier with SMOTE at: http://localhost:5000/#/experiments/837861923811052143/runs/ea51ad13158a43419784b849b05be0f8
🧪 View experiment at: http://localhost:5000/#/experiments/837861923811052143


<h1 align='center' style='color:blue' >Model Registry</h1>

In [3]:
## create data like fraud detection i.e class imbalnce
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split , cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report , f1_score, make_scorer
import matplotlib.pyplot as plt 
import seaborn as sns 
import warnings
warnings.filterwarnings('ignore')

X,y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, n_classes=2,
                          weights=[0.9,0.1], flip_y=0, random_state=42)
np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100]))

In [4]:
# split the dataset into train test set with test_size =0.3

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,stratify=y,random_state=42)
print(np.unique(y_train,return_counts=True))
print(np.unique(y_test,return_counts=True))

(array([0, 1]), array([630,  70]))
(array([0, 1]), array([270,  30]))


#### Handle Imbalnce class with SMOTETomek


In [5]:
from imblearn.combine import SMOTETomek
smt= SMOTETomek(random_state=42)
X_train_smt, y_train_smt = smt.fit_resample(X_train,y_train)
print(np.unique(y_train_smt,return_counts=True))

(array([0, 1]), array([619, 619]))


### Train  Model With Logistic, RandomForestClassifier , XGBoost Classifier

In [None]:
models = [
    (
        "Logistic_Regression", 
        {"C": 1, "solver": 'liblinear'},
        LogisticRegression(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random_Forest", 
        {"n_estimators": 30, "max_depth": 3},
        RandomForestClassifier(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGB_Classifier",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier_With_SMOTE",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(), 
        (X_train_smt, y_train_smt),
        (X_test, y_test)
    )
]

results = []

for model_name , params, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]
    
    model.set_params(**params)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    report= classification_report(y_test, y_pred,output_dict=True)
    results.append(report)


  




In [9]:
results[0]['macro avg']['f1-score']

0.749791492910759

In [55]:

import mlflow
import mlflow.sklearn
import mlflow.xgboost

mlflow.set_experiment("Credit_Risk_Detection")
mlflow.set_tracking_uri("http://localhost:5000")

for i , element in enumerate(models):
    model_name =element[0]
    params =element[1]
    model =element[2]
    report =results[i]

    with mlflow.start_run(run_name=model_name):
        mlflow.log_param("params",params)
        mlflow.log_metrics({
            "accuracy":report['accuracy'],
            "recall_class_0":report['0']['recall'],
            "recall_class_1":report['1']['recall'],
            "f1_score_macro":report['macro avg']['f1-score']
        } )
        
        if 'XGB' in model_name:
            mlflow.xgboost.log_model(model,"model" )
        else:
            mlflow.sklearn.log_model(model, "model")    

    

MlflowException: Cannot set a deleted experiment 'Credit_Risk_Detection' as the active experiment. You can restore the experiment, or permanently delete the experiment to create a new one.

In [58]:
from mlflow.client import MlflowClient

client = MlflowClient()
for exp in client.search_experiments(view_type="ALL"):
    print(exp.experiment_id, exp.name,exp.lifecycle_stage)

273844665434896050 Anamoly_Detection active
854331578283412092 Credit_Risk_Detection deleted
0 Default active


In [45]:
experiment_ids= '854331578283412092'
for run in client.search_runs(experiment_ids=experiment_ids):
    print(run.info.run_name,"::: ",run.info.run_id)

XGBClassifier_With_SMOTE :::  2ba291063c384001b5399d64586a7345
XGB_Classifier :::  f38dffca59374cb09ca0f6069e282ded
Random_Forest :::  10671ead231047f19bf11f897b59767c
Logistic_Regression :::  6b42ee647f5c4c2d950f295090890699


In [47]:
experiment_ids= '854331578283412092'
for run in client.search_runs(experiment_ids=experiment_ids):
    if run.info.run_id =="6b42ee647f5c4c2d950f295090890699": 
       client.delete_run(run.info.run_id)

In [48]:
experiment_ids= '854331578283412092'
for run in client.search_runs(experiment_ids=experiment_ids):
    print(run.info.run_name,"::: ",run.info.run_id)

XGBClassifier_With_SMOTE :::  2ba291063c384001b5399d64586a7345
XGB_Classifier :::  f38dffca59374cb09ca0f6069e282ded
Random_Forest :::  10671ead231047f19bf11f897b59767c


In [None]:
exp_id="854331578283412092"
client.delete_experiment(experiment_id=exp_id)
    

RestException: RESOURCE_DOES_NOT_EXIST: Could not find experiment with ID 854331578283412092

In [59]:
client.restore_experiment(experiment_id=exp_id)

In [60]:
client = MlflowClient()
for exp in client.search_experiments(view_type="ALL"):
    print(exp.experiment_id, exp.name,exp.lifecycle_stage)

273844665434896050 Anamoly_Detection active
854331578283412092 Credit_Risk_Detection active
0 Default active


### Register the Model

In [63]:
for exp in client.search_experiments(view_type="ALL"):
    print(exp.name, "==", exp.experiment_id)

Anamoly_Detection == 273844665434896050
Credit_Risk_Detection == 854331578283412092
Default == 0


In [83]:
for run in client.search_runs(experiment_ids='273844665434896050'):
    print(run.info.run_name,"==",run.info.run_id)

XGBClassifier_With_SMOTE == 5d23bdebad904db8a590a4ebeae4dc65
XGB_Classifier == c739d2fc01534c83a75573c314e5f322
Random_Forest == a10c08c6a35643d6bbca0ac31cf6577c
Logistic_Regression == e28e30c34c9f4334aef5d1be53a887e9


In [84]:
## XGBClassifier_With_SMOTE params, metrics
run=client.get_run(run_id="5d23bdebad904db8a590a4ebeae4dc65")
params =run.data.params
print(params)
print("Metrics ::")
pd.DataFrame(run.data.metrics,index=[0])

{'params': "{'use_label_encoder': False, 'eval_metric': 'logloss'}"}
Metrics ::


Unnamed: 0,accuracy,f1_score_macro,recall_class_0,recall_class_1
0,0.963333,0.899632,0.977778,0.833333


In [85]:

if mlflow.active_run() is not None:
    mlflow.end_run()
mlflow.set_experiment("Anamoly_Detection")
mlflow.set_tracking_uri("http://localhost:5000")    

#### Model Register for XGBClassifier_With_SMOTE

In [89]:
### Register model with high recall_class_1
## XGBClassifier_With_SMOTE model have hig recall_class_1

# Option 1: specify `registered_model_name` parameter when logging a model
# -- mlflow.<flavor>.log_model(..., registered_model_name="<YOUR_MODEL_NAME>")

# Option 2: register a logged model
#XGB-Smote_runid ="5d23bdebad904db8a590a4ebeae4dc65"

mlflow.set_experiment("Anamoly_Detection")
mlflow.set_tracking_uri("http://localhost:5000")   

run_id =input("Enter run_id")
model_name = 'XGB-Smote'
model_uri = f"runs:/{run_id}/model"

with mlflow.start_run(run_id=run_id):
  mlflow.register_model(model_uri=model_uri, name=model_name)


Registered model 'XGB-Smote' already exists. Creating a new version of this model...
2025/10/23 13:18:45 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB-Smote, version 3


🏃 View run XGBClassifier_With_SMOTE at: http://localhost:5000/#/experiments/273844665434896050/runs/5d23bdebad904db8a590a4ebeae4dc65
🧪 View experiment at: http://localhost:5000/#/experiments/273844665434896050


Created version '3' of model 'XGB-Smote'.


#### Model Register for Random_Forest

In [90]:
# andom_Forest == a10c08c6a35643d6bbca0ac31cf6577c

run_id_rf = 'a10c08c6a35643d6bbca0ac31cf6577c'
model_name_rf = 'Random-Forest-Model'
model_uri_rf =f"runs:/{run_id}/model"

with mlflow.start_run(run_id=run_id_rf):
  mlflow.register_model(model_uri=model_uri_rf,name=model_name_rf)





Registered model 'Random-Forest-Model' already exists. Creating a new version of this model...
2025/10/23 13:19:10 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random-Forest-Model, version 2


🏃 View run Random_Forest at: http://localhost:5000/#/experiments/273844665434896050/runs/a10c08c6a35643d6bbca0ac31cf6577c
🧪 View experiment at: http://localhost:5000/#/experiments/273844665434896050


Created version '2' of model 'Random-Forest-Model'.


#### List all experiment

In [91]:
client =MlflowClient()

for exp in client.search_experiments(view_type='ALL'):
    print(exp.name," == ",exp.experiment_id, )


Anamoly_Detection  ==  273844665434896050
Credit_Risk_Detection  ==  854331578283412092
Default  ==  0


Now we are list all run id in  Credit_Risk_Detection experiment

In [92]:
for run in client.search_runs(experiment_ids="854331578283412092"):
    print(run.info.run_name," = ",run.info.run_id)

XGBClassifier_With_SMOTE  =  2ba291063c384001b5399d64586a7345
XGB_Classifier  =  f38dffca59374cb09ca0f6069e282ded
Random_Forest  =  10671ead231047f19bf11f897b59767c
Logistic_Regression  =  6b42ee647f5c4c2d950f295090890699
XGBClassifier_With_SMOTE  =  77ae843262ed45daabd7cdd45297b2d3
XGB_Classifier  =  913e856f9f414105b39b20029098993f
Random_Forest  =  5bf32bf8627744289aeeeb99bb96dce7
Logistic_Regression  =  c1ab1bb42ce049ef8b0beb5d3f16e016


Register model:  Logistic_Regression  =  c1ab1bb42ce049ef8b0beb5d3f16e016  belong to Credit_Risk_Detection  

In [93]:
run_id_lr = "c1ab1bb42ce049ef8b0beb5d3f16e016"
model_name_lr= "Logistic-Regression-Model_CreditRisk"

model_ui_lr = f"runs:/{run_id_lr}/model"

registered_model_lr = mlflow.register_model(model_uri= model_ui_lr , name=model_name_lr)

Successfully registered model 'Logistic-Regression-Model_CreditRisk'.
2025/10/23 13:29:48 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Logistic-Regression-Model_CreditRisk, version 1
Created version '1' of model 'Logistic-Regression-Model_CreditRisk'.


In [95]:
#XGB_Classifier  =  913e856f9f414105b39b20029098993f

run_id_xgb = '913e856f9f414105b39b20029098993f'
model_name_xgb = 'XGB_Classifier-Model'
model_uri_xgb =f"runs:/{run_id}/model"

mlflow.set_experiment("Credit_Risk_Detection")

with mlflow.start_run(run_id=run_id_xgb):
  mlflow.register_model(model_uri=model_uri_xgb,name=model_name_xgb)

Successfully registered model 'XGB_Classifier-Model'.
2025/10/23 13:32:42 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB_Classifier-Model, version 1


🏃 View run XGB_Classifier at: http://localhost:5000/#/experiments/854331578283412092/runs/913e856f9f414105b39b20029098993f
🧪 View experiment at: http://localhost:5000/#/experiments/854331578283412092


Created version '1' of model 'XGB_Classifier-Model'.


### Load the Model

Load model = XGB-Smote model of version 2

In [97]:
# model_uri = f"models:/{model_name}@{model_version_alias}"
# or
# odel_uri = f"models:/{model_name}/{model_version}" 
model_name = "XGB-Smote"
model_version = 2
model_uri = f"models:/{model_name}/{model_version}"

model_xgb = mlflow.xgboost.load_model(model_uri=model_uri)
model_xgb

Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  1.74it/s]


0,1,2
,objective,'binary:logistic'
,base_score,'5E-1'
,booster,'gbtree'
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


Make prediction with loaded model

In [105]:
y_pred_xgb = model_xgb.predict(X_test)
print(classification_report(y_test,y_pred_xgb))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       270
           1       0.81      0.83      0.82        30

    accuracy                           0.96       300
   macro avg       0.89      0.91      0.90       300
weighted avg       0.96      0.96      0.96       300



Load model = Logistic-Regression-Model_CreditRisk  of version latest

In [103]:


model_name_lr = "Logistic-Regression-Model_CreditRisk"
model_version_lr= "latest"

model_uri_lr= f"models:/{model_name_lr}/{model_version_lr}"

model_lr = mlflow.sklearn.load_model(model_uri=model_uri_lr)
model_lr

Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'liblinear'
,max_iter,100


In [104]:
y_pred_lr =model_lr.predict(X_test)
print(classification_report(y_test,y_pred_lr))

              precision    recall  f1-score   support

           0       0.95      0.96      0.95       270
           1       0.60      0.50      0.55        30

    accuracy                           0.92       300
   macro avg       0.77      0.73      0.75       300
weighted avg       0.91      0.92      0.91       300



#### Load XGB-Smote model which is champion

In [107]:
model_name_champ = "XGB-Smote"

model_uri_champ = f"models:/{model_name_champ}@champion"

model_xgb_champ = mlflow.xgboost.load_model(model_uri=model_uri_champ)
model_xgb_champ


Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.00it/s]


0,1,2
,objective,'binary:logistic'
,base_score,'5E-1'
,booster,'gbtree'
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [108]:
model_alias ="champion"
model_name_champ = "XGB-Smote"

model_uri_champ = f"models:/{model_name_champ}@{model_alias}"

model_xgb_champ = mlflow.xgboost.load_model(model_uri=model_uri_champ)
model_xgb_champ

Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.37it/s]


0,1,2
,objective,'binary:logistic'
,base_score,'5E-1'
,booster,'gbtree'
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


### Transition the ModelFrom Dev to Production

In [111]:
from mlflow.client import MlflowClient

client = MlflowClient()

dev_model_uri =f"models:/XGB-Smote@challenger"
prod_model_name = "Anamoly-Detection-prod"

client.copy_model_version(src_model_uri=dev_model_uri, dst_name=prod_model_name)

Registered model 'Anamoly-Detection-prod' already exists. Creating a new version of this model...
Copied version '3' of model 'XGB-Smote' to version '3' of model 'Anamoly-Detection-prod'.


<ModelVersion: aliases=[], creation_timestamp=1761218850677, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1761218850677, metrics=None, model_id=None, name='Anamoly-Detection-prod', params=None, run_id='5d23bdebad904db8a590a4ebeae4dc65', run_link='', source='models:/XGB-Smote/3', status='READY', status_message=None, tags={}, user_id='', version='3'>

#### Load the final production champion model and do prediction

In [None]:
prod_model_name ="Anamoly-Detection-prod"
prod_model_version = "champion"
prod_model_uri= f"models:/{prod_model_name}@{prod_model_version}"

prod_model_final = mlflow.xgboost.load_model(model_uri=prod_model_uri)
prod_model_final

Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.27it/s]


0,1,2
,objective,'binary:logistic'
,base_score,'5E-1'
,booster,'gbtree'
,callbacks,
,colsample_bylevel,
,colsample_bynode,
,colsample_bytree,
,device,
,early_stopping_rounds,
,enable_categorical,False


In [113]:
y_pred_final = prod_model_final.predict(X_test)
print(classification_report(y_test, y_pred_final))

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       270
           1       0.81      0.83      0.82        30

    accuracy                           0.96       300
   macro avg       0.89      0.91      0.90       300
weighted avg       0.96      0.96      0.96       300



#### List out all Registered Models

In [118]:
from mlflow.tracking import MlflowClient
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_registry_uri("http://127.0.0.1:5000")
client = MlflowClient()

# List all registered models
for model in client.search_registered_models():
    print(f"📦 Model Name: {model.name}")
    print(f"   Latest Versions: {[v.version for v in model.latest_versions]}")
    print(f"   Description: {model.description}\n")

📦 Model Name: Anamoly-Detection-prod
   Latest Versions: ['3']
   Description: 

📦 Model Name: Logistic-Regression-Model_CreditRisk
   Latest Versions: ['1']
   Description: 

📦 Model Name: XGB-Smote
   Latest Versions: ['3']
   Description: 



In [116]:
client.list_artifacts()

TypeError: MlflowClient.list_artifacts() missing 1 required positional argument: 'run_id'