In [3]:
# Базовые библиотеки для работы с данными и вычислений
import pandas as pd

# MLflow и логирование
import mlflow.tensorflow
from mlflow.exceptions import MlflowException
import dagshub

# Системные библиотеки
import sys
import os
import yaml

# Модули создания, обучения модели и логирования
sys.path.append(os.path.join(os.getcwd(), ".."))
from create_model import create_model
from train_model import train_and_evaluate_model
from mlflow_logger import log_to_mlflow

In [4]:
# Загружаем данные
sys.path.append(os.path.join(os.getcwd(), ".."))
file_path = os.path.join("..", "..", "data", "raw", "water_potability.csv")

df = pd.read_csv(file_path)
X = df.drop('Potability', axis=1)
y = df['Potability']
feature_names = X.columns.tolist()

# Создаем эксперимент для логирования
dagshub.init(repo_owner='sever.cpa.general', repo_name='my-first-repo', mlflow=True)
run_name = "ml_baseline"
experiment_name="Water Probability [RF]"

try:
    experiment_id = mlflow.create_experiment(experiment_name)
except MlflowException:
    experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id

## Загружаем конфиги
def load_config(file_name):
    config_path = os.path.join(os.getcwd(), 'configs', file_name)
    with open(config_path, 'r') as file:
        return yaml.safe_load(file)

hyperparams = load_config('hyperparameters.yml')['hyperparameters']
model_config = load_config('model_config.yml')['model']
logging_config = load_config('logging_config.yml')['logging']

# Обучение и оценка модели
results = train_and_evaluate_model(
    X=X,
    y=y,
    model_config=model_config,
    hyperparams=hyperparams
)
# Логирование результатов в MLflow
log_to_mlflow(
    evaluation_results=results,
    experiment_name="Water Probability [RF]",
    logging_config=logging_config,
    run_name="ml_baseline_hyper",
    save_model=True,
    hyperparams=hyperparams,  # Добавляем гиперпараметры
    model_config=model_config  # Добавляем конфигурацию модели
)


Epoch 1/50
66/66 - 4s - 55ms/step - accuracy: 0.6031 - loss: 0.6839 - val_accuracy: 0.6050 - val_loss: 0.6753
Epoch 2/50
66/66 - 0s - 7ms/step - accuracy: 0.6054 - loss: 0.6729 - val_accuracy: 0.6050 - val_loss: 0.6714
Epoch 3/50
66/66 - 0s - 6ms/step - accuracy: 0.6054 - loss: 0.6714 - val_accuracy: 0.6050 - val_loss: 0.6710
Epoch 4/50
66/66 - 0s - 6ms/step - accuracy: 0.6054 - loss: 0.6727 - val_accuracy: 0.6050 - val_loss: 0.6711
Epoch 5/50
66/66 - 0s - 7ms/step - accuracy: 0.6054 - loss: 0.6713 - val_accuracy: 0.6050 - val_loss: 0.6709
Epoch 6/50
66/66 - 0s - 7ms/step - accuracy: 0.6054 - loss: 0.6722 - val_accuracy: 0.6050 - val_loss: 0.6710
Epoch 7/50
66/66 - 0s - 6ms/step - accuracy: 0.6054 - loss: 0.6701 - val_accuracy: 0.6050 - val_loss: 0.6710
Epoch 8/50
66/66 - 0s - 6ms/step - accuracy: 0.6054 - loss: 0.6702 - val_accuracy: 0.6050 - val_loss: 0.6710
Epoch 9/50
66/66 - 0s - 5ms/step - accuracy: 0.6054 - loss: 0.6715 - val_accuracy: 0.6050 - val_loss: 0.6710
Epoch 10/50
66/66 

Successfully registered model 'ml_baseline_hyper'.
2024/12/09 12:32:54 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: ml_baseline_hyper, version 1
Created version '1' of model 'ml_baseline_hyper'.


🏃 View run ml_baseline_hyper at: https://dagshub.com/sever.cpa.general/my-first-repo.mlflow/#/experiments/2/runs/c43d3bdfb0784b089911ff7a8ecfa587
🧪 View experiment at: https://dagshub.com/sever.cpa.general/my-first-repo.mlflow/#/experiments/2
