In [95]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings
from utils import (
    prepare_ml_data_from_features,
    cyclical_feature_encoding,
    calculate_horizons,
    generate_time_series_features,
    calculate_metrics,
)

# Игнорировать все предупреждения
warnings.filterwarnings("ignore")

import random

# Фиксация random seed для воспроизводимости
random_seed = 42
np.random.seed(random_seed)
tf.random.set_seed(random_seed)
random.seed(random_seed)

# Загрузка данных
file_path = "Meteo1-2023-15min(resampled).csv"
df = pd.read_csv(file_path, delimiter=",")

# Преобразование времени в datetime и установка индекса
df["time_YYMMDD_HHMMSS"] = pd.to_datetime(
    df["time_YYMMDD_HHMMSS"], format="%Y-%m-%d %H:%M:%S"
)
df.set_index("time_YYMMDD_HHMMSS", inplace=True)

# Добавление новых признаков
df["month"] = df.index.month
df["day_of_month"] = df.index.day
df["hour"] = df.index.hour

# Преобразование циклических признаков
df_transformed = cyclical_feature_encoding(
    df, ["WindDirection", "month", "day_of_month", "hour"]
)

# Используем параметры, которые дали лучший результат
lookback_hours = 24
forecast_hours = 3
neurons = 100
layers = 5
lag_step = 4
rolling_window_min = 2
rolling_window_max = 10
expanding_window_min = 2
expanding_window_max = 10

# Вычисление горизонта прогноза и лагов
forecast_horizon, lookback_horizon = calculate_horizons(
    df_transformed, forecast_hours, lookback_hours
)

lags = list(range(1, lookback_horizon, lag_step))
rolling_window_sizes = list(
    range(rolling_window_min, min(rolling_window_max, lookback_horizon), lag_step)
)
expanding_window_sizes = list(
    range(expanding_window_min, min(expanding_window_max, lookback_horizon), lag_step)
)

# Генерация признаков
df_transformed = generate_time_series_features(
    df_transformed,
    columns=[
        "WindSpeedMax",
        "AirTemperature",
        "AirPressure",
        "AirHumidity",
        "WindSpeed",
        "WindDirection_sin",
        "WindDirection_cos",
    ],
    lags=lags,
    rolling_window_sizes=rolling_window_sizes,
    expanding_window_sizes=expanding_window_sizes,
)

# Разделение на обучающую и тестовую выборки
df_train, df_test = train_test_split(
    df_transformed, test_size=0.33, random_state=42, shuffle=False
)

# Подготовка данных для машинного обучения
X_train, y_train = prepare_ml_data_from_features(
    df_train, target_variable="WindSpeed", forecast_horizon=forecast_horizon
)
X_test, y_test = prepare_ml_data_from_features(
    df_test, target_variable="WindSpeed", forecast_horizon=forecast_horizon
)

# Масштабирование данных
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

# Формат данных
input_shape = (X_train_scaled.shape[1],)  # Входной вектор признаков

# Создание модели с найденными параметрами
model = Sequential()
model.add(Dense(neurons, activation="relu", input_shape=input_shape))
for _ in range(layers - 1):
    model.add(Dense(neurons, activation="relu"))
model.add(Dense(forecast_horizon))  # Прогнозируем N шагов вперед

# Компиляция модели с заданным learning rate
model.compile(optimizer=Adam(learning_rate=0.00005), loss="mse")

# Настройка коллбека для сохранения лучших весов модели по валидационной потере
checkpoint = ModelCheckpoint(
    "best_model.keras",
    monitor="val_loss",
    save_best_only=True,
    mode="min",
    save_weights_only=False,  # Сохраняем всю модель
)

# Настройка коллбека для динамического изменения learning rate
reduce_lr = ReduceLROnPlateau(
    monitor="val_loss", factor=0.2, patience=5, min_lr=0.00001
)

# Обучение модели с использованием reduce_lr
model.fit(
    X_train_scaled,
    y_train_scaled,
    epochs=150,
    batch_size=256,
    validation_split=0.2,
    callbacks=[checkpoint, reduce_lr],
)

# Загрузка лучших сохраненных весов
model = tf.keras.models.load_model("best_model.keras")

# Прогнозирование
y_pred_scaled = model.predict(X_test_scaled)

# Обратное масштабирование y_pred
y_pred = scaler_y.inverse_transform(y_pred_scaled)

# Оценка модели
print(calculate_metrics(y_test, y_pred, y_train))


Epoch 1/150
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1.0410 - val_loss: 0.8979 - learning_rate: 5.0000e-05
Epoch 2/150
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.8617 - val_loss: 0.7057 - learning_rate: 5.0000e-05
Epoch 3/150
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6394 - val_loss: 0.5608 - learning_rate: 5.0000e-05
Epoch 4/150
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.5184 - val_loss: 0.5155 - learning_rate: 5.0000e-05
Epoch 5/150
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4731 - val_loss: 0.4969 - learning_rate: 5.0000e-05
Epoch 6/150
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4492 - val_loss: 0.4854 - learning_rate: 5.0000e-05
Epoch 7/150
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.4334 - val_loss: 0.4795

In [112]:
from utils import plot_sample_with_features

# Пример использования
plot_sample_with_features(X_test, y_test, target_variable='WindSpeed', columns=[
        "WindSpeed",
    ], y_pred_values=y_pred)