In [11]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score
import warnings
warnings.filterwarnings('ignore')

In [12]:
# Step 1: Create an imbalanced binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, 
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100]))

In [13]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [5]:
log=XGBClassifier()

In [6]:
params_logistic={
    "max_iter" :1000,
    "solver" :"lbfgs",
    'penalty' :'l2'
}

params_random={
  "n_estimators":150,
  "criterion" :'entropy',
  "max_depth":10,
  "min_samples_split": 10,
  "min_samples_leaf": 4,
  "bootstrap": True,
  "oob_score": True,
  "random_state": 888,
}

params_xgboost={
    'n_estimators': 100,
    'max_depth': 6,
    'learning_rate': 0.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'random_state': 888,
}



In [19]:
from imblearn.combine import SMOTETomek
smt=SMOTETomek(random_state=42)
X_train_res, y_train_res = smt.fit_resample(X_train, y_train)

In [20]:
np.unique(y_train_res, return_counts=True)

(array([0, 1]), array([703, 703]))

In [21]:
models=[
  (
    'LogisticRegression',
    LogisticRegression(**params_logistic),
    (X_train, y_train),
    (X_test, y_test)
  ),
  ('RandomForestClassifier',
   RandomForestClassifier(**params_random),
    (X_train, y_train),
    (X_test, y_test)
  ),
  ('XGBClassifier',
   XGBClassifier(**params_xgboost),
    (X_train_res, y_train_res),
    (X_test, y_test)
  )
]

In [27]:
reports = []

for model_name, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    report = {
        'model': model_name,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1
    }
    reports.append(report)

In [22]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost

from mlflow import MlflowClient

client=MlflowClient(tracking_uri="https://dagshub.com/diwakar.active/MLOps.mlflow")


In [23]:
experiment_description = (
  "This experiment is for classification models on imbalanced dataset using Logistic Regression, Random Forest, and XGBoost."
"Imbalanced_Classification_Experiment"
)

In [24]:
experiment_tag={
  "owner": "data_scientist_team",
  "project": "Imbalanced_Classification",
  "Goal" : "Evaluate classification models on imbalanced dataset",
  "Version": "1.0",
  "Release_date": "2025-10-29",
  "mlflow_version": mlflow.__version__,
  "mlflow.note.content":experiment_description  
}

In [25]:
client.create_experiment(
    name="Imbalanced_Classification_Experiment",
    tags=experiment_tag,
)

'0'

In [28]:
reports[0]

{'model': 'LogisticRegression',
 'accuracy': 0.94,
 'precision': 0.8125,
 'recall': 0.5909090909090909,
 'f1_score': 0.6842105263157895}

In [29]:
para=models[1][1]
para.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'entropy',
 'max_depth': 10,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 4,
 'min_samples_split': 10,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 150,
 'n_jobs': None,
 'oob_score': True,
 'random_state': 888,
 'verbose': 0,
 'warm_start': False}

In [30]:
reports[0]

{'model': 'LogisticRegression',
 'accuracy': 0.94,
 'precision': 0.8125,
 'recall': 0.5909090909090909,
 'f1_score': 0.6842105263157895}

In [39]:
import mlflow
import os

mlflow.set_tracking_uri("https://dagshub.com/diwakar.active/MLOps.mlflow")
mlflow.set_experiment("Imbalanced_Classification_Experiment")

for i, element in enumerate(models):
    model_name = element[0]
    model = element[1]
    para = element[1].get_params()
    report = reports[i]
    name = model_name + "_model"
    
    with mlflow.start_run(run_name=model_name):
        mlflow.log_param("params", para)
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('Recall', report['recall'])
        mlflow.log_metric('Precision', report['precision'])
        mlflow.log_metric('f1_score', report['f1_score'])
        
        # --- FIX STARTS HERE ---
        model_dir = f"models/{name}"
        if "XGB" in model_name:
            mlflow.xgboost.save_model(xgb_model=model, path=model_dir)
        else:
            mlflow.sklearn.save_model(sk_model=model, path=model_dir)
            
        # log the model directory as an artifact instead of using log_model
        mlflow.log_artifacts(model_dir, artifact_path=name)
        # --- FIX ENDS HERE ---
        print(f"Logged {model_name} model to MLflow.")
        

Logged LogisticRegression model to MLflow.
🏃 View run LogisticRegression at: https://dagshub.com/diwakar.active/MLOps.mlflow/#/experiments/0/runs/2925bc1885b343c3a6800cf6555be3d9
🧪 View experiment at: https://dagshub.com/diwakar.active/MLOps.mlflow/#/experiments/0
🏃 View run LogisticRegression at: https://dagshub.com/diwakar.active/MLOps.mlflow/#/experiments/0/runs/2925bc1885b343c3a6800cf6555be3d9
🧪 View experiment at: https://dagshub.com/diwakar.active/MLOps.mlflow/#/experiments/0
Logged RandomForestClassifier model to MLflow.
Logged RandomForestClassifier model to MLflow.
🏃 View run RandomForestClassifier at: https://dagshub.com/diwakar.active/MLOps.mlflow/#/experiments/0/runs/75e881725e904c62a8ea9c9f2926969f
🧪 View experiment at: https://dagshub.com/diwakar.active/MLOps.mlflow/#/experiments/0
🏃 View run RandomForestClassifier at: https://dagshub.com/diwakar.active/MLOps.mlflow/#/experiments/0/runs/75e881725e904c62a8ea9c9f2926969f
🧪 View experiment at: https://dagshub.com/diwakar.act

## Register the model (By comapairing we get to know Random forest perform best so we chose this in model Registry)

In [2]:
import mlflow
mlflow.set_tracking_uri("https://dagshub.com/diwakar.active/MLOps.mlflow")

In [None]:
run_id=input("Enter the run ID to register the model: ")
model_name = "RandomForestClassifier"
model_uri = f"runs:/{run_id}/{model_name}_model"
registered_model = mlflow.register_model(model_uri=model_uri, name=model_name)
print(f"Model registered as {model_name} with version {registered_model.version}")

Registered model 'RandomForestClassifier' already exists. Creating a new version of this model...
2025/10/31 05:47:52 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForestClassifier, version 1


Model registered as RandomForestClassifier with version 1


Created version '1' of model 'RandomForestClassifier'.


### Load Model

In [8]:
import mlflow
mlflow.set_tracking_uri("https://dagshub.com/diwakar.active/MLOps.mlflow")
#un_id = input("Enter the run ID to register the model: ")
#artifact_path = f"models/{model_name}_Registered"

#model_uri = f"runs:/{run_id}/{artifact_path}"
model_uri="mlflow-artifacts:/323c71440ba34f688cea412f4823f5f8/30a5a3691c6e4f61b5ab07cdca63a975/artifacts/XGBClassifier_model"
loaded_model = mlflow.xgboost.load_model(model_uri=model_uri)

y_pred = loaded_model.predict(X_test)
print(y_pred[:4])


Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  2.29it/s]

[0 1 0 0]





In [9]:
y_test[:4]

array([0, 1, 0, 0])

#### Transition to production

In [9]:
from mlflow.tracking import MlflowClient
client = MlflowClient()
model_version = registered_model.version
client.set_registered_model_alias(name=model_name, alias="Production", version=model_version)
print(f"Model version {model_version} promoted to Production ✅")


Model version 1 promoted to Production ✅


In [14]:
prod_model = mlflow.sklearn.load_model(model_uri=f"models:/{model_name}@Production")

# Use it
y_pred = prod_model.predict(X_test)
print(y_pred[:5])


Downloading artifacts: 100%|██████████| 5/5 [00:02<00:00,  1.92it/s]

[0 1 0 0 0]





In [None]:
production_model_name="XGBClassifier_Registered"
model_version = 1
prod_model_uri = f"models:/{production_model_name}@challender"

loaded_model = mlflow.sklearn.load_model(prod_model_uri)
y_pred = loaded_model.predict(X_test)
y_pred[:4]

Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 22.79it/s] 


array([0, 1, 0, 0])

## DagsHUb 

In [6]:
# Ideally you will not require following 4 lines if you have started fresh and do not have any previous dagshub credentials on your computer
import os
os.environ['MLFLOW_TRACKING_USERNAME'] = 'diwakar.active' # 'learnpythonlanguage'
os.environ['MLFLOW_TRACKING_PASSWORD'] = 'Rinku@9137' # 
os.environ['MLFLOW_TRACKING_URI'] = 'https://dagshub.com/diwakar.active/MLOps.mlflow' # https://dagshub.com/learnpythonlanguage/mlflow_dagshub_demo.mlflow


In [7]:
import dagshub
dagshub.init(repo_owner='diwakar.active', repo_name='MLOps', mlflow=True)