In [5]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import mlflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import git

In [6]:
# –£–∫–∞–∑—ã–≤–∞–µ–º MLflow, –∫—É–¥–∞ –æ—Ç–ø—Ä–∞–≤–ª—è—Ç—å –¥–∞–Ω–Ω—ã–µ
mlflow.set_tracking_uri("http://213.21.252.250:5000")

# –ó–∞–¥–∞–µ–º –∏–º—è —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞
mlflow.set_experiment("LSTM (test)")

# --- –ü–æ–ª—É—á–∞–µ–º —Ö–µ—à –∫–æ–º–º–∏—Ç–∞ Git ---
try:
    repo = git.Repo(search_parent_directories=True)
    git_commit_hash = repo.head.object.hexsha
except Exception as e:
    git_commit_hash = "N/A" # –ù–∞ —Å–ª—É—á–∞–π, –µ—Å–ª–∏ —Å–∫—Ä–∏–ø—Ç –∑–∞–ø—É—â–µ–Ω –Ω–µ –∏–∑ Git-—Ä–µ–ø–æ–∑–∏—Ç–æ—Ä–∏—è
    print(f"Warning: Could not get git commit hash. {e}")

print(f"Current Git Commit Hash: {git_commit_hash}")

# --- –ü–∞—Ä–∞–º–µ—Ç—Ä—ã, –∫–æ—Ç–æ—Ä—ã–µ –Ω—É–∂–Ω–æ –ª–æ–≥–∏—Ä–æ–≤–∞—Ç—å ---
# –ü–∞—Ä–∞–º–µ—Ç—Ä—ã –∏–∑ —Å–∫—Ä–∏–ø—Ç–∞ –Ω–∞—Ä–µ–∑–∫–∏ –¥–∞–Ω–Ω—ã—Ö (sample_creator)
data_params = {
    "window_size": 100,
    "step": 2,
    "sampling_rate": 10
}

# –ì–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –º–æ–¥–µ–ª–∏
model_params = {
    "epochs": 2,
    "batch_size": 512,
    "validation_split": 0.2,
    "optimizer": "adam",
    "loss": "mean_squared_error"
}

Current Git Commit Hash: 551ac6ab30253e18a55880773ddeee97084809a9


In [7]:
from gc import callbacks

from mlflow.keras.callback import MlflowCallback


with mlflow.start_run():
    print("Starting MLflow run...")

    # --- –õ–æ–≥–∏—Ä—É–µ–º –ø–∞—Ä–∞–º–µ—Ç—Ä—ã ---
    mlflow.log_params(data_params)
    mlflow.log_params(model_params)
    mlflow.set_tag("git_commit", git_commit_hash)
    print("Parameters logged.")

    def load_and_merge_data(npz_units):
      sample_array_lst = []
      label_array_lst = []
      for npz_unit in npz_units:
        loaded = np.load(npz_unit)
        sample_array_lst.append(loaded['sample'])
        label_array_lst.append(loaded['label'])
      sample_array = np.dstack(sample_array_lst)
      label_array = np.concatenate(label_array_lst)
      sample_array = sample_array.transpose(2, 0, 1)
      return sample_array, label_array

    processed_dir = '../data/processed/'

    # –°–æ–±–∏—Ä–∞–µ–º –ø—É—Ç–∏ –∫ —Ñ–∞–π–ª–∞–º –¥–ª—è train –∏ test
    train_files = [os.path.join(processed_dir, f) for f in os.listdir(processed_dir) if f.startswith(('Unit2_', 'Unit5_', 'Unit10_', 'Unit16_', 'Unit18_', 'Unit20_'))]
    test_files = [os.path.join(processed_dir, f) for f in os.listdir(processed_dir) if f.startswith(('Unit11_', 'Unit14_', 'Unit15_'))]
    print(train_files)

    # –ó–∞–≥—Ä—É–∂–∞–µ–º –¥–∞–Ω–Ω—ã–µ
    X_train, y_train = load_and_merge_data(train_files)
    X_test, y_test = load_and_merge_data(test_files)

    print('–†–∞–∑–º–µ—Ä –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏ (X):', X_train.shape)
    print('–†–∞–∑–º–µ—Ä –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏ (y):', y_train.shape)
    print('–†–∞–∑–º–µ—Ä —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–∏ (X):', X_test.shape)
    print('–†–∞–∑–º–µ—Ä —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–∏ (y):', y_test.shape)

    # –û–ø—Ä–µ–¥–µ–ª—è–µ–º —Ñ–æ—Ä–º—É –≤—Ö–æ–¥–Ω—ã—Ö –¥–∞–Ω–Ω—ã—Ö –∏–∑ X_train
    n_timesteps, n_features = X_train.shape[1], X_train.shape[2]

    # --- –°–æ–∑–¥–∞–µ–º –ø—Ä–æ—Å—Ç—É—é LSTM –º–æ–¥–µ–ª—å ---
    model = Sequential()
    model.add(LSTM(16, input_shape=(n_timesteps, n_features), return_sequences=True)) # return_sequences=True, –µ—Å–ª–∏ —Å–ª–µ–¥—É—é—â–∏–π —Å–ª–æ–π —Ç–æ–∂–µ LSTM
    model.add(Dropout(0.2))
    model.add(LSTM(8))
    model.add(Dropout(0.2))
    model.add(Dense(1)) # –û–¥–∏–Ω –≤—ã—Ö–æ–¥, —Ç–∞–∫ –∫–∞–∫ –ø—Ä–µ–¥—Å–∫–∞–∑—ã–≤–∞–µ–º –æ–¥–Ω–æ —á–∏—Å–ª–æ - RUL

    # –ö–æ–º–ø–∏–ª–∏—Ä—É–µ–º –º–æ–¥–µ–ª—å
    model.compile(optimizer=model_params['optimizer'], loss=model_params['loss'], metrics=['mae'])

    summary_list = []
    model.summary(print_fn=lambda x: summary_list.append(x))
    model_summary_string = "\n".join(summary_list)

    mlflow.log_text(model_summary_string, 'model_summary.txt')

    model.summary()

    # --- –û–±—É—á–∞–µ–º –º–æ–¥–µ–ª—å ---
    history = model.fit(X_train, y_train, 
                        epochs=model_params['epochs'], 
                        batch_size=model_params['batch_size'], 
                        validation_split=model_params['validation_split'], # –ò—Å–ø–æ–ª—å–∑—É–µ–º —á–∞—Å—Ç—å –¥–∞–Ω–Ω—ã—Ö –¥–ª—è –≤–∞–ª–∏–¥–∞—Ü–∏–∏ –Ω–∞ –ª–µ—Ç—É
                        callbacks=[mlflow.keras.MLflowCallback()],
                        verbose=1)

    # --- –û—Ü–µ–Ω–∏–≤–∞–µ–º –Ω–∞ —Ç–µ—Å—Ç–æ–≤—ã—Ö –¥–∞–Ω–Ω—ã—Ö ---
    loss, mae = model.evaluate(X_test, y_test, verbose=0)
    print(f'\nTest MAE: {mae:.2f}')

    metrics = {
        "mae": mae
    }
    mlflow.log_metrics(metrics)
    print(f"Metrics logged: {metrics}")

    # --- –õ–æ–≥–∏—Ä–æ–≤–∞–Ω–∏–µ —Å–∞–º–æ–π –º–æ–¥–µ–ª–∏ ---
    mlflow.keras.log_model(
        model,
        artifact_path="lstm-model", # –ù–∞–∑–≤–∞–Ω–∏–µ –ø–∞–ø–∫–∏ —Å –º–æ–¥–µ–ª—å—é –≤ MLflow
    )
    print("Model logged as an artifact.")

    print("MLflow run finished successfully!")


Starting MLflow run...
Parameters logged.
['../data/processed/Unit20_win100_str2_smp10.npz', '../data/processed/Unit18_win100_str2_smp10.npz', '../data/processed/Unit10_win100_str2_smp10.npz', '../data/processed/Unit16_win100_str2_smp10.npz', '../data/processed/Unit2_win100_str2_smp10.npz', '../data/processed/Unit5_win100_str2_smp10.npz']
–†–∞–∑–º–µ—Ä –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏ (X): (262877, 100, 20)
–†–∞–∑–º–µ—Ä –æ–±—É—á–∞—é—â–µ–π –≤—ã–±–æ—Ä–∫–∏ (y): (262877,)
–†–∞–∑–º–µ—Ä —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–∏ (X): (62539, 100, 20)
–†–∞–∑–º–µ—Ä —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–∏ (y): (62539,)


  super().__init__(**kwargs)


üèÉ View run trusting-vole-256 at: http://213.21.252.250:5000/#/experiments/1/runs/cfd5eae6b4ab4a1a8df05f325e8fdb15
üß™ View experiment at: http://213.21.252.250:5000/#/experiments/1


KeyboardInterrupt: 