In [1]:
# Базовые библиотеки для работы с данными и вычислений
import pandas as pd

# MLflow и логирование
import mlflow.tensorflow
from mlflow.exceptions import MlflowException
import dagshub

# Системные библиотеки
import sys
import os
import yaml


# Модули создания, обучения модели и логирования
sys.path.append(os.path.join(os.getcwd(), ".."))
from create_model import create_model
from train_model import train_and_evaluate_model
from mlflow_logger import log_to_mlflow


KeyboardInterrupt



In [2]:
# Загружаем данные
sys.path.append(os.path.join(os.getcwd(), ".."))
file_path = os.path.join("..", "..", "data", "raw", "water_potability.csv")

df = pd.read_csv(file_path)
X = df.drop('Potability', axis=1)
y = df['Potability']
feature_names = X.columns.tolist()

# Создаем эксперимент для логирования
dagshub.init(repo_owner='sever.cpa.general', repo_name='my-first-repo', mlflow=True)
run_name = "ml_baseline"
experiment_name="Water Probability [RF]"

try:
    experiment_id = mlflow.create_experiment(experiment_name)
except MlflowException:
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

## Загружаем конфиги
def load_config(file_name):
    config_path = os.path.join(os.getcwd(), 'configs', file_name)
    with open(config_path, 'r') as file:
        return yaml.safe_load(file)

hyperparams = load_config('hyperparameters.yml')['hyperparameters']
model_config = load_config('model_config.yml')['model']
logging_config = load_config('logging_config.yml')['logging']

# Обучение и оценка модели
results = train_and_evaluate_model(
    X=X,
    y=y,
    model_config=model_config,
    hyperparams=hyperparams
)
# Логирование результатов в MLflow
log_to_mlflow(
    evaluation_results=results,
    experiment_name="Water Probability [RF]",
    logging_config=logging_config,
    run_name="ml_baseline_test3",
    save_model=False,
    hyperparams=hyperparams,  # Добавляем гиперпараметры
    model_config=model_config  # Добавляем конфигурацию модели
)


Epoch 1/10
50/50 - 6s - 125ms/step - AUC: 0.5063 - F1Score: 0.5716 - FalseNegatives: 589.0000 - FalsePositives: 55.0000 - Precision: 0.4211 - Recall: 0.0636 - TrueNegatives: 888.0000 - TruePositives: 40.0000 - accuracy: 0.5903 - loss: 0.6882 - val_AUC: 0.5000 - val_F1Score: 0.5683 - val_FalseNegatives: 156.0000 - val_FalsePositives: 0.0000e+00 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_TrueNegatives: 237.0000 - val_TruePositives: 0.0000e+00 - val_accuracy: 0.6031 - val_loss: 0.6826
Epoch 2/10
50/50 - 0s - 9ms/step - AUC: 0.5273 - F1Score: 0.5716 - FalseNegatives: 629.0000 - FalsePositives: 0.0000e+00 - Precision: 0.0000e+00 - Recall: 0.0000e+00 - TrueNegatives: 943.0000 - TruePositives: 0.0000e+00 - accuracy: 0.5999 - loss: 0.6765 - val_AUC: 0.5000 - val_F1Score: 0.5683 - val_FalseNegatives: 156.0000 - val_FalsePositives: 0.0000e+00 - val_Precision: 0.0000e+00 - val_Recall: 0.0000e+00 - val_TrueNegatives: 237.0000 - val_TruePositives: 0.0000e+00 - val_accuracy: 0.6031 -