In [1]:
import mlflow
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.dummy import DummyClassifier

In [2]:
# Указываем доступ в БД
with open("database.env", "r") as file:

    lines = file.readlines()
    user = lines[0].split("=")[-1][:-1]
    password = lines[1].split("=")[-1][:-1]
    db = lines[2].split("=")[-1]
print(user, password, db)
sql_string = f"postgresql://{user}:{password}@0.0.0.0/{db}"
mlflow.set_tracking_uri(sql_string)

mlflow_user magical_password mlflow_db


In [9]:
random_state = 42

models = []

# Dummy Classifier does not support n_jobs
models.append(
    (
        "Dummy Classifier (mode):",
        DummyClassifier(strategy="most_frequent", random_state=random_state),
    )
)

# Logistic Regression supports n_jobs with certain solvers like 'saga'
models.append(
    (
        "Logistic Regression:",
        LogisticRegression(solver="saga", random_state=random_state, n_jobs=-1),
    )
)

# Naive Bayes does not support n_jobs
models.append(("Naive Bayes:", GaussianNB()))

# KNeighborsClassifier supports n_jobs
models.append(
    (
        "K-Nearest Neighbour:",
        KNeighborsClassifier(n_neighbors=3, n_jobs=-1),
    )
)

# Decision Tree does not support n_jobs
models.append(
    (
        "Decision Tree:",
        DecisionTreeClassifier(random_state=random_state),
    )
)

# # Support Vector Machine (SVC) does not support n_jobs, so no changes here
# models.append(
#     (
#         "Support Vector Machine-linear:",
#         SVC(kernel="linear", random_state=random_state),
#     )
# )
# models.append(
#     (
#         "Support Vector Machine-rbf:",
#         SVC(kernel="rbf", random_state=random_state),
#     )
# )

# RandomForestClassifier supports n_jobs
models.append(
    (
        "Random Forest:",
        RandomForestClassifier(n_estimators=7, random_state=random_state, n_jobs=-1),
    )
)

# MLPClassifier supports n_jobs
models.append(
    (
        "MLP:",
        MLPClassifier(
            hidden_layer_sizes=(45, 30, 15),
            solver="sgd",
            learning_rate_init=0.01,
            max_iter=500,
            random_state=random_state,
        ),
    )
)

# AdaBoostClassifier supports n_jobs
models.append(
    (
        "AdaBoostClassifier:",
        AdaBoostClassifier(random_state=random_state),
    )
)

# GradientBoostingClassifier supports n_jobs
models.append(
    (
        "GradientBoostingClassifier:",
        GradientBoostingClassifier(random_state=random_state),
    )
)

print("Models appended...")

Models appended...


Loading the data:

In [4]:
%store -r X_train X_test y_train y_test

In [10]:
results = []
names = []


experiment_name = "Model Comparison Experiment"

try:
    experiment_id = mlflow.create_experiment(experiment_name)
except mlflow.exceptions.MlflowException:
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

for name, model in models:
    kfold = KFold(n_splits=2, random_state=0, shuffle=True)
    cv_result = cross_val_score(
        model, X_train, y_train.values.ravel(), cv=kfold, scoring="accuracy", n_jobs=-1
    )
    names.append(name)
    results.append(cv_result)

    with mlflow.start_run(experiment_id=experiment_id, run_name=name):
        mlflow.log_param("model_name", name)
        mlflow.log_metric("accuracy", results[-1].mean() * 100)

