In [20]:
import numpy as numpy
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier


In [3]:
X,y=make_classification(n_samples=1000,n_features=10,shuffle=True,random_state=18)

In [5]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)

In [17]:
models= {
    "Logistic Regression": LogisticRegression(C=1.0, solver='liblinear', penalty='l2', max_iter=100, random_state=42),
    "Random Forest": RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=2, min_samples_leaf=1, random_state=42),
    "XGBoost Classifier": XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, objective='binary:logistic', random_state=42)
}

print(models)

{'Logistic Regression': LogisticRegression(random_state=42, solver='liblinear'), 'Random Forest': RandomForestClassifier(max_depth=10, random_state=42), 'XGBoost Classifier': XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=6, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=100, n_jobs=None,
              num_parallel_tree=None, random_state=42, ...)}


In [7]:
import mlflow

In [21]:
reports=[]
for model_name, ind_model in models.items():
    model=ind_model
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

In [22]:
print(reports)

[{'0': {'precision': 0.8944099378881988, 'recall': 0.935064935064935, 'f1-score': 0.9142857142857143, 'support': 154.0}, '1': {'precision': 0.9280575539568345, 'recall': 0.8835616438356164, 'f1-score': 0.9052631578947369, 'support': 146.0}, 'accuracy': 0.91, 'macro avg': {'precision': 0.9112337459225166, 'recall': 0.9093132894502758, 'f1-score': 0.9097744360902256, 'support': 300.0}, 'weighted avg': {'precision': 0.9107851110416015, 'recall': 0.91, 'f1-score': 0.9098947368421053, 'support': 300.0}}, {'0': {'precision': 0.8975903614457831, 'recall': 0.9675324675324676, 'f1-score': 0.93125, 'support': 154.0}, '1': {'precision': 0.9626865671641791, 'recall': 0.8835616438356164, 'f1-score': 0.9214285714285714, 'support': 146.0}, 'accuracy': 0.9266666666666666, 'macro avg': {'precision': 0.930138464304981, 'recall': 0.9255470556840419, 'f1-score': 0.9263392857142857, 'support': 300.0}, 'weighted avg': {'precision': 0.9292705148954025, 'recall': 0.9266666666666666, 'f1-score': 0.926470238095

In [23]:
mlflow.set_experiment("Anomaly Detection")
mlflow.set_tracking_uri("http://localhost:5000")

for i, element in enumerate(models):
    model_name = element[0]
    model = element[1]
    report = reports[i]
    
    with mlflow.start_run(run_name=model_name):        
        mlflow.log_param("model", model_name)
        mlflow.log_metric('accuracy', report['accuracy'])
        mlflow.log_metric('recall_class_1', report['1']['recall'])
        mlflow.log_metric('recall_class_0', report['0']['recall'])
        mlflow.log_metric('f1_score_macro', report['macro avg']['f1-score'])        
        
        if "XGBoost Classifier" in model_name:
            mlflow.xgboost.log_model(model, "model")
        elif "Logistic Regression" in model_name:
            mlflow.sklearn.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model") 

2024/10/09 01:09:38 INFO mlflow.tracking.fluent: Experiment with name 'Anomaly Detection' does not exist. Creating a new experiment.
2024/10/09 01:09:52 INFO mlflow.tracking._tracking_service.client: 🏃 View run L at: http://localhost:5000/#/experiments/697091888028748967/runs/540015171cb544d1850e16d677cf7878.
2024/10/09 01:09:52 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/697091888028748967.
2024/10/09 01:10:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run R at: http://localhost:5000/#/experiments/697091888028748967/runs/78d1e436887d49bcb19db9647a0f508a.
2024/10/09 01:10:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/697091888028748967.
2024/10/09 01:10:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run X at: http://localhost:5000/#/experiments/697091888028748967/runs/ff1b45632a8c44fb9e2e5c597d364c49.
2024/10/09 01:10:13 INFO mlflow.tracking._t