In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [4]:
x,y=make_classification(n_samples=1000,
                        n_features=10,
                        n_informative=2,
                        n_redundant=8,
                        weights=[0.9,0.1],
                        flip_y=0,
                        random_state=42)

In [6]:
np.unique(y,return_counts=True)

(array([0, 1]), array([900, 100]))

In [8]:
#step-2

x_train,x_test,y_train,y_test=train_test_split(x,y,random_state=42,test_size=0.3,stratify=y)

In [10]:
# model -1 

model_log_reg=LogisticRegression(C=1,solver="liblinear")
model_log_reg.fit(x_train,y_train)

y_pred_model_1=model_log_reg.predict(x_test)

report=classification_report(y_test,y_pred_model_1)
print(report)

              precision    recall  f1-score   support

           0       0.95      0.96      0.95       270
           1       0.60      0.50      0.55        30

    accuracy                           0.92       300
   macro avg       0.77      0.73      0.75       300
weighted avg       0.91      0.92      0.91       300



In [12]:
#model-2 
params={"n_estimators":20,"max_depth":3}
        
model_random_forest=RandomForestClassifier(**params)
model_random_forest.fit(x_train,y_train)

y_pred_random_forest=model_random_forest.predict(x_test)

report_2=classification_report(y_test,y_pred_random_forest)
print(report_2)

              precision    recall  f1-score   support

           0       0.97      1.00      0.98       270
           1       0.95      0.70      0.81        30

    accuracy                           0.97       300
   macro avg       0.96      0.85      0.89       300
weighted avg       0.97      0.97      0.96       300



In [14]:
#model-3

xgb_class = XGBClassifier(use_label_encoder=False,eval_metrics="logloss")
xgb_class.fit(x_train,y_train)

y_pred_xgb=xgb_class.predict(x_test)

report=classification_report(y_test,y_pred_xgb)
print(report)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       270
           1       0.96      0.80      0.87        30

    accuracy                           0.98       300
   macro avg       0.97      0.90      0.93       300
weighted avg       0.98      0.98      0.98       300



In [16]:
#model 4

from imblearn.combine import SMOTETomek
smt = SMOTETomek(random_state=42)
x_train_res,y_train_res=smt.fit_resample(x_train,y_train)

np.unique(y_train_res,return_counts=True)

(array([0, 1]), array([619, 619]))

In [18]:
xgb_class_2 = XGBClassifier(use_label_encoder=False,eval_metrics="logloss")
xgb_class_2.fit(x_train_res,y_train_res)

y_pred_xgb_2=xgb_class_2.predict(x_test)

report=classification_report(y_test,y_pred_xgb_2)
print(report)

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       270
           1       0.81      0.83      0.82        30

    accuracy                           0.96       300
   macro avg       0.89      0.91      0.90       300
weighted avg       0.96      0.96      0.96       300



In [20]:
#track experiment flow


models = [
    (
        "Logistic Regression", 
        {"C": 1, "solver": 'liblinear'},
        LogisticRegression(C=1, solver='liblinear'), 
        (x_train,y_train),
        (x_test, y_test)
    ),
    (
        "Random Forest", 
        {"n_estimators": 30, "max_depth": 3},
        RandomForestClassifier(n_estimators=30, max_depth=3), 
        (x_train, y_train),
        (x_test, y_test)
    ),
    (
        "XGBClassifier",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), 
        (x_train, y_train),
        (x_test, y_test)
    ),
    (
        "XGBClassifier With SMOTE",
        {"use_label_encoder": False, "eval_metric": 'logloss'},
        XGBClassifier(use_label_encoder=False, eval_metric='logloss'), 
        (x_train_res, y_train_res),
        (x_test, y_test)
    )
]

In [22]:
reports=[]

for model_name,params,model,train_set,test_set in models:
    x_train=train_set[0]
    y_train=train_set[1]
    x_test=test_set[0]
    y_test=test_set[1]
    model.set_params(**params)
    model.fit(x_train,y_train)
    y_pred=model.predict(x_test)
    report=classification_report(y_test,y_pred,output_dict=True)
    reports.append(report)

In [24]:
import mlflow
import mlflow.sklearn
import mlflow.xgboost

In [28]:
import dagshub
dagshub.init(repo_owner='sangameshgouda1', repo_name='ML_Flow_Dagshub-Demo', mlflow=True)

In [30]:
import os

os.environ["MLFLOW_TRACKING_USERNAME"]="sangameshgouda1"
os.environ["MLFLOW_TRACKING_PASSWORD"]="f1afcd1437981c01e4cd5346a111bb7bdd1267d6"
os.environ["MLFLOW_TRACKING_URI"]="https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow"

#initalize ml flow 

mlflow.set_experiment("Anomaly Detection")

for i,element in enumerate(models):
    model_name=element[0]
    params=element[1]
    model=element[2]
    report=reports[i]
    with mlflow.start_run(run_name=model_name):
        mlflow.log_params(params)
        mlflow.log_metric("accuracy",report["accuracy"])
        mlflow.log_metric('recall_class_1', report['1']['recall'])
        mlflow.log_metric('recall_class_0', report['0']['recall'])
        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score']) 
        if "XGB" in model_name:
            mlflow.xgboost.log_model(model,"model")
        else:
            mlflow.sklearn.log_model(model,"model")

2025/05/12 13:53:50 INFO mlflow.tracking.fluent: Experiment with name 'Anomaly Detection' does not exist. Creating a new experiment.


🏃 View run Logistic Regression at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0/runs/eeb34cd547f04c15a953dcd2b5a32439
🧪 View experiment at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0




🏃 View run Random Forest at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0/runs/e12db4b7920b426484134ffc8b0d8991
🧪 View experiment at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0




🏃 View run XGBClassifier at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0/runs/0e392348c24745f3a77a995652568289
🧪 View experiment at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0




🏃 View run XGBClassifier With SMOTE at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0/runs/d47415ed1955447897270d5cc5608011
🧪 View experiment at: https://dagshub.com/sangameshgouda1/ML_Flow_Dagshub-Demo.mlflow/#/experiments/0
