In [5]:
import sklearn, imblearn
print("scikit-learn:", sklearn.__version__)
print("imbalanced-learn:", imblearn.__version__)

scikit-learn: 1.7.2
imbalanced-learn: 0.14.0


In [7]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [9]:
# Step 1: Create an imbalanced binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, 
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100], dtype=int64))

In [11]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

In [13]:
X_train.shape

(700, 10)

In [15]:
X_test.shape

(300, 10)

In [39]:
log_reg = LogisticRegression(max_iter=1000,C=1, solver='liblinear')
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
print(classification_report(y_test, y_pred_log_reg))

              precision    recall  f1-score   support

           0       0.98      0.88      0.93       270
           1       0.45      0.87      0.59        30

    accuracy                           0.88       300
   macro avg       0.72      0.87      0.76       300
weighted avg       0.93      0.88      0.90       300



In [19]:
rf_clf = RandomForestClassifier(n_estimators=30, max_depth=3)
rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)
print(classification_report(y_test, y_pred_rf))

              precision    recall  f1-score   support

           0       0.96      1.00      0.98       270
           1       0.95      0.67      0.78        30

    accuracy                           0.96       300
   macro avg       0.96      0.83      0.88       300
weighted avg       0.96      0.96      0.96       300



In [25]:
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_clf.fit(X_train, y_train)
y_pred_xgb = xgb_clf.predict(X_test)
print(classification_report(y_test, y_pred_xgb))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       270
           1       0.96      0.80      0.87        30

    accuracy                           0.98       300
   macro avg       0.97      0.90      0.93       300
weighted avg       0.98      0.98      0.98       300



In [23]:
from imblearn.combine import SMOTETomek

smt = SMOTETomek(random_state=42)
X_train_res, y_train_res = smt.fit_resample(X_train, y_train)

np.unique(y_train_res, return_counts=True)

(array([0, 1]), array([619, 619], dtype=int64))

In [29]:
xgb_clf = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_clf.fit(X_train_res, y_train_res)
y_pred_xgb = xgb_clf.predict(X_test)
print(classification_report(y_test, y_pred_xgb))

              precision    recall  f1-score   support

           0       0.98      0.99      0.98       270
           1       0.86      0.83      0.85        30

    accuracy                           0.97       300
   macro avg       0.92      0.91      0.92       300
weighted avg       0.97      0.97      0.97       300



In [31]:
import mlflow

In [41]:
models = [
    (
        "Logistic Regression", 
        LogisticRegression(max_iter=1000,C=1, solver='liblinear'), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random Forest", 
        RandomForestClassifier(n_estimators=30, max_depth=3), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier",
        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier With SMOTE",
        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), 
        (X_train_res, y_train_res),
        (X_test, y_test)
    )
]

reports = []

for model_name, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

In [55]:
import mlflow
import mlflow.xgboost
import mlflow.sklearn
mlflow.set_experiment("Anamoly Detection")
mlflow.set_tracking_uri("http://127.0.0.1:5000")
for i,element in enumerate(models):
    model_name=element[0]
    model=element[1]
    report=reports[i]
    with mlflow.start_run(run_name=model_name):
         
          mlflow.log_param("model",model_name)
          
          mlflow.log_metric('accuracy', report['accuracy'])
          mlflow.log_metric('recall_class_1', report['1']['recall'])
          mlflow.log_metric('recall_class_0', report['0']['recall'])
          mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])  

          if isinstance(model, XGBClassifier):
              mlflow.xgboost.log_model(model,name="model")
          else:
              mlflow.sklearn.log_model(model,artifact_path="model")
        
        
    



üèÉ View run Logistic Regression at: http://127.0.0.1:5000/#/experiments/961996740831763576/runs/a739fe75b08147869f25b0d1b08684a6
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/961996740831763576




üèÉ View run Random Forest at: http://127.0.0.1:5000/#/experiments/961996740831763576/runs/357f6b0753734e7f865b0671c19932bc
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/961996740831763576




üèÉ View run XGBClassifier at: http://127.0.0.1:5000/#/experiments/961996740831763576/runs/8d2b4ebf87c24f279df12321e705ff61
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/961996740831763576




üèÉ View run XGBClassifier With SMOTE at: http://127.0.0.1:5000/#/experiments/961996740831763576/runs/79d8f59f7cb34d9986f6a1558cb15239
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/961996740831763576


In [61]:
model = 'XGB'
run_id=input('Please type RunID')
model_uri = f'runs:/{run_id}/model'

with mlflow.start_run(run_id=run_id):
    mlflow.register_model(model_uri=model_uri, name=model_name)

Please type RunID 8d2b4ebf87c24f279df12321e705ff61


Registered model 'XGB' already exists. Creating a new version of this model...
2025/11/04 18:11:11 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB, version 1


üèÉ View run XGBClassifier at: http://127.0.0.1:5000/#/experiments/961996740831763576/runs/8d2b4ebf87c24f279df12321e705ff61
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/961996740831763576


Created version '1' of model 'XGB'.


In [63]:
model_version = 1
model_uri = f"models:/{model_name}/{model_version}"

loaded_model = mlflow.xgboost.load_model(model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:4]

array([0, 0, 0, 0])

In [65]:
current_model_uri = f"models:/{model_name}@challenger"
production_model_name = "anomaly-detection-prod"

client = mlflow.MlflowClient()
client.copy_model_version(src_model_uri=current_model_uri, dst_name=production_model_name)

Successfully registered model 'anomaly-detection-prod'.
Copied version '1' of model 'XGB' to version '1' of model 'anomaly-detection-prod'.


<ModelVersion: aliases=[], creation_timestamp=1762260941723, current_stage='None', deployment_job_state=<ModelVersionDeploymentJobState: current_task_name='', job_id='', job_state='DEPLOYMENT_JOB_CONNECTION_STATE_UNSPECIFIED', run_id='', run_state='DEPLOYMENT_JOB_RUN_STATE_UNSPECIFIED'>, description='', last_updated_timestamp=1762260941723, metrics=None, model_id=None, name='anomaly-detection-prod', params=None, run_id='8d2b4ebf87c24f279df12321e705ff61', run_link='', source='models:/XGB/1', status='READY', status_message=None, tags={}, user_id='', version='1'>

In [67]:
model_version = 1
prod_model_uri = f"models:/{production_model_name}@champion"

loaded_model = mlflow.xgboost.load_model(prod_model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:4]

RestException: INVALID_PARAMETER_VALUE: Registered model alias champion not found.

In [71]:
import os 
os.getcwd()

'C:\\Users\\PC'

In [73]:
pwd

'C:\\Users\\PC'

In [77]:
print(os.getcwd())

C:\Users\PC
