In [31]:
import numpy as np 
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import LogisticRegression 
from sklearn.ensemble import RandomForestClassifier 
from xgboost import XGBClassifier 
from sklearn.metrics import classification_report


In [32]:
X,y=make_classification(n_samples=1000,n_features=10,n_informative=2,n_redundant=8,
                        weights=[0.9,0.1],flip_y=0,random_state=42)
np.unique(y,return_counts=True)

(array([0, 1]), array([900, 100]))

In [33]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,stratify=y,random_state=42)

In [34]:
params={
    "solver":"lbfgs",
    "max_iter":1000,
    "multi_class":"auto",
    "random_state":8888
}

lr=LogisticRegression(**params)
lr.fit(X_train,y_train)
y_pred=lr.predict(X_test)
report=classification_report(y_test,y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.95      0.97      0.96       270
           1       0.62      0.50      0.56        30

    accuracy                           0.92       300
   macro avg       0.79      0.73      0.76       300
weighted avg       0.91      0.92      0.92       300





In [35]:
report_dict=classification_report(y_test,y_pred,output_dict=True)
report_dict

{'0': {'precision': 0.9456521739130435,
  'recall': 0.9666666666666667,
  'f1-score': 0.9560439560439561,
  'support': 270.0},
 '1': {'precision': 0.625,
  'recall': 0.5,
  'f1-score': 0.5555555555555556,
  'support': 30.0},
 'accuracy': 0.92,
 'macro avg': {'precision': 0.7853260869565217,
  'recall': 0.7333333333333334,
  'f1-score': 0.7557997557997558,
  'support': 300.0},
 'weighted avg': {'precision': 0.9135869565217392,
  'recall': 0.92,
  'f1-score': 0.9159951159951161,
  'support': 300.0}}

## Random Forest

In [36]:
rf=RandomForestClassifier(n_estimators=30,max_depth=3)
rf.fit(X_train,y_train)
y_pred_rf=rf.predict(X_test)
print(classification_report(y_test,y_pred_rf))

              precision    recall  f1-score   support

           0       0.97      1.00      0.98       270
           1       0.95      0.70      0.81        30

    accuracy                           0.97       300
   macro avg       0.96      0.85      0.89       300
weighted avg       0.97      0.97      0.96       300



In [37]:
xgb=XGBClassifier(use_label_encoder=False,eval_metric='logloss')
xgb.fit(X_train,y_train)
y_pred_xgb=xgb.predict(X_test)
print(classification_report(y_test,y_pred_xgb))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       270
           1       0.96      0.80      0.87        30

    accuracy                           0.98       300
   macro avg       0.97      0.90      0.93       300
weighted avg       0.98      0.98      0.98       300



Parameters: { "use_label_encoder" } are not used.



In [38]:
from imblearn.combine import SMOTETomek
smt=SMOTETomek(random_state=42)
X_train_res,y_train_res=smt.fit_resample(X_train,y_train)
np.unique(y_train_res,return_counts=True)

(array([0, 1]), array([619, 619]))

In [39]:
xgb_clf=XGBClassifier(use_label_encoder=False,eval_metric='logloss')
xgb.fit(X_train,y_train)
y_pred_xgb=xgb.predict(X_test)
print(classification_report(y_test,y_pred_xgb))

Parameters: { "use_label_encoder" } are not used.



              precision    recall  f1-score   support

           0       0.98      1.00      0.99       270
           1       0.96      0.80      0.87        30

    accuracy                           0.98       300
   macro avg       0.97      0.90      0.93       300
weighted avg       0.98      0.98      0.98       300



In [40]:
models = [
    (
        "Logistic Regression", 
        {"C":1,"solver":"liblinear"},
        LogisticRegression(C=1, solver='liblinear'), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "Random Forest", 
        {"n_estimators":30,"max_depth":3},
        RandomForestClassifier(n_estimators=30, max_depth=3), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier",
        {"use_label_encoder":False,"eval_metric":"logloss"},
        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), 
        (X_train, y_train),
        (X_test, y_test)
    ),
    (
        "XGBClassifier With SMOTE",
        {"use_label_encoder":False,"eval_metric":"logloss"},
        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), 
        (X_train_res, y_train_res),
        (X_test, y_test)
    )
]


In [41]:
reports = []

for model_name, params,model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]
    
    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.



In [42]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost


In [43]:
# Initialize MLflow
mlflow.set_experiment("Detection1")
mlflow.set_tracking_uri("http://localhost:5000")

for i, element in enumerate(models):
    model_name = element[0]
    params=element[1]
    model = element[2]
    report = reports[i]
    
    with mlflow.start_run(run_name=model_name): 
        mlflow.log_params(params)       
        mlflow.log_param("model", model_name)
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('recall_class_1', report['1']['recall'])
        mlflow.log_metric('recall_class_0', report['0']['recall'])
        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])        
        
        if "XGB" in model_name:
            mlflow.xgboost.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")  

2025/01/19 06:47:51 INFO mlflow.tracking.fluent: Experiment with name 'Detection1' does not exist. Creating a new experiment.


🏃 View run Logistic Regression at: http://localhost:5000/#/experiments/925271627309144308/runs/5b90eab5b70045d2ac55bd141fce758e
🧪 View experiment at: http://localhost:5000/#/experiments/925271627309144308




🏃 View run Random Forest at: http://localhost:5000/#/experiments/925271627309144308/runs/c634707915b5436caae67546cea42727
🧪 View experiment at: http://localhost:5000/#/experiments/925271627309144308




🏃 View run XGBClassifier at: http://localhost:5000/#/experiments/925271627309144308/runs/16ad9f94b8da4e3b86375d1f9fff8f95
🧪 View experiment at: http://localhost:5000/#/experiments/925271627309144308




🏃 View run XGBClassifier With SMOTE at: http://localhost:5000/#/experiments/925271627309144308/runs/dfc373d33f774ed6bdab8189cb723d2f
🧪 View experiment at: http://localhost:5000/#/experiments/925271627309144308


In [44]:
import mlflow 


In [46]:
import mlflow
import mlflow.sklearn

# Set the MLflow tracking URI and experiment
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Detection1")

# Start an MLflow run
with mlflow.start_run():
    # Log model parameters
    mlflow.log_params(params)
    
    # Log metrics (ensure keys exist in report_dict)
    mlflow.log_metrics({
        'accuracy': report_dict['accuracy'],           # Ensure report_dict['accuracy'] exists
        'recall_0': report_dict['0']['recall'],       # Ensure report_dict['0']['recall'] exists
        'recall_1': report_dict['1']['recall'],       # Ensure report_dict['1']['recall'] exists
        'f1_score': report_dict['macro avg']['f1-score']  # Ensure report_dict['macro avg']['f1-score'] exists
    })
    
    # Log the trained model (ensure 'lr' is a trained model object)
    mlflow.sklearn.log_model(lr, artifact_path="Logistic Regression")




🏃 View run bouncy-owl-219 at: http://127.0.0.1:5000/#/experiments/925271627309144308/runs/743cbd501a5c40a7b56ecc4f471e7613
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/925271627309144308


## Model_Register

In [63]:
import mlflow

# Example inputs
run_id = input("Enter Run ID: ")  # e.g., 'dfc373d33f774ed6bdab8189cb723d2f'
model_name = "Random Forest"  # Ensure this matches the artifact path
model_uri = f"runs:/{run_id}/model"  # Correctly formatted URI

# Register the model
result = mlflow.register_model(model_uri=model_uri, name=model_name)
print(f"Model registered successfully: {result}")


Successfully registered model 'Random Forest'.
2025/01/19 07:24:18 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Random Forest, version 1


Model registered successfully: <ModelVersion: aliases=[], creation_timestamp=1737251658930, current_stage='None', description='', last_updated_timestamp=1737251658930, name='Random Forest', run_id='c634707915b5436caae67546cea42727', run_link='', source='mlflow-artifacts:/925271627309144308/c634707915b5436caae67546cea42727/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>


Created version '1' of model 'Random Forest'.


In [66]:
import mlflow.sklearn

# Define the model URI
model_version = 1
model_name ="Random Forest"  # Replace with your registered model name
model_uri = f"models:/{model_name}/{model_version}"

# Load the model using mlflow.sklearn
loaded_model = mlflow.sklearn.load_model(model_uri)

# Predict using the loaded model
y_pred = loaded_model.predict(X_test)
print(y_pred[:4])


Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 54.02it/s]

[0 0 0 0]





In [67]:
import mlflow
logged_model = 'runs:/c634707915b5436caae67546cea42727/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(X_test))

Downloading artifacts: 100%|██████████| 5/5 [00:00<00:00, 39.53it/s]
 - scikit-learn (current: 1.2.2, required: scikit-learn==1.6.0)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0])

In [69]:
dev_model_uri = f"models:/{model_name}/{model_version}"
prod_model='Detection1'
client=mlflow.MlflowClient()
client.copy_model_version(src_model_uri=dev_model_uri,dst_name=prod_model)

Successfully registered model 'Detection1'.
Copied version '1' of model 'Random Forest' to version '1' of model 'Detection1'.


<ModelVersion: aliases=[], creation_timestamp=1737252288788, current_stage='None', description='', last_updated_timestamp=1737252288788, name='Detection1', run_id='c634707915b5436caae67546cea42727', run_link='', source='models:/Random Forest/1', status='READY', status_message='', tags={}, user_id='', version='1'>