In [None]:
import mlflow
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.dummy import DummyClassifier

In [None]:
# Указываем доступ в БД
with open("database.env", "r") as file:
    lines = file.readlines()
    user = file[0].split("=")[-1]
    password = file[1].split("=")[-1]
    db = file[2].split("=")[-1]
sql_string = f"postgresql://{user}:{password}@0.0.0.0/{db}"
mlflow.set_tracking_uri(sql_string)

In [None]:
random_state = 42

models = []

models.append(
    (
        "Dummy Classifier (mode):",
        DummyClassifier(strategy="most_frequent", random_state=random_state),
    )
)
models.append(("Logistic Regression:", LogisticRegression(random_state=random_state)))
models.append(("Naive Bayes:", GaussianNB()))  # GaussianNB does not use random_state
models.append(
    ("K-Nearest Neighbour:", KNeighborsClassifier(n_neighbors=3))
)  # KNeighborsClassifier does not use random_state
models.append(("Decision Tree:", DecisionTreeClassifier(random_state=random_state)))
models.append(
    ("Support Vector Machine-linear:", SVC(kernel="linear", random_state=random_state))
)
models.append(
    ("Support Vector Machine-rbf:", SVC(kernel="rbf", random_state=random_state))
)
models.append(
    (
        "Random Forest:",
        RandomForestClassifier(n_estimators=7, random_state=random_state),
    )
)
models.append(
    (
        "MLP:",
        MLPClassifier(
            hidden_layer_sizes=(45, 30, 15),
            solver="sgd",
            learning_rate_init=0.01,
            max_iter=500,
            random_state=random_state,
        ),
    )
)
models.append(("AdaBoostClassifier:", AdaBoostClassifier(random_state=random_state)))
models.append(
    (
        "GradientBoostingClassifier:",
        GradientBoostingClassifier(random_state=random_state),
    )
)

print('Models appended...')

Loading the data:

In [None]:
%store -r X_train, X_test, y_train, y_test

In [None]:
results = []
names = []
for name,model in models:
    kfold = KFold(n_splits=10, random_state=0,shuffle=True)
    cv_result = cross_val_score(model,X_train,Y_train.values.ravel(), cv = kfold,scoring = "accuracy")
    names.append(name)
    results.append(cv_result)
    with mlflow.start_run(experiment_id=1, run_name=name):
        mlflow.log_param('model_name', 'name')
        mlflow.log_metric("accuracy",results[-1].mean()*100)