In [None]:
# =========================
# IMPORTY
# =========================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, GRU

# =========================
# WCZYTANIE I PRZYGOTOWANIE DANYCH
# =========================
dataset = pd.read_csv('airline-passengers.csv')
dataset['Month'] = pd.to_datetime(dataset['Month'])
dataset.set_index('Month', inplace=True)

values = dataset.values.astype('float32')

plt.figure(figsize=(16,6))
plt.plot(dataset['Passengers'])
plt.title("Airline passengers")
plt.show()

# =========================
# PODZIAŁ TRAIN / TEST (test bez ucinania)
# =========================
train_size = int(len(values) * 0.70)
train_data = values[:train_size]
test_data = values[train_size:]

scaler = MinMaxScaler(feature_range=(0, 1))
train_scaled = scaler.fit_transform(train_data)
test_scaled = scaler.transform(test_data)

# =========================
# FUNKCJA TWORZENIA ZBIORÓW (bez ucinania testu)
# =========================
def create_dataset_full(train, test, look_back):
    X_train, y_train = [], []
    X_val, y_val = [], []
    X_test, y_test = [], []

    # split train → train / validation (80/20)
    val_size = int(len(train) * 0.8)
    train_part = train[:val_size]
    val_part = train[val_size - look_back:]

    # TRAIN
    for i in range(len(train_part) - look_back):
        X_train.append(train_part[i:i+look_back, 0])
        y_train.append(train_part[i+look_back, 0])

    # VALIDATION
    for i in range(len(val_part) - look_back):
        X_val.append(val_part[i:i+look_back, 0])
        y_val.append(val_part[i+look_back, 0])

    # TEST (ciągłość zachowana)
    test_full = np.vstack((train[-look_back:], test))
    for i in range(len(test_full) - look_back):
        X_test.append(test_full[i:i+look_back, 0])
        y_test.append(test_full[i+look_back, 0])
       def reshape(X):
        return np.reshape(np.array(X), (len(X), 1, look_back))

    return (
        reshape(X_train), np.array(y_train),
        reshape(X_val), np.array(y_val),
        reshape(X_test), np.array(y_test)
    )

# =========================
# FUNKCJA BACKTESTÓW (POPRAWIONA)
# =========================
def backtest_plot(model, X_test, y_test, look_back):
    preds = scaler.inverse_transform(model.predict(X_test))
    y_true = scaler.inverse_transform(y_test.reshape(-1,1))

    index = dataset.index[-len(preds):]

    plt.figure(figsize=(16,6))
    plt.plot(dataset['Passengers'], label='True')
    plt.plot(index, preds, label='Prediction')
    plt.legend()
    plt.show()

    rmse = mean_squared_error(y_true, preds, squared=False)
    print(f"RMSE TEST: {rmse:.3f}")
    return rmse

# =========================
# GRID SEARCH (look_back, units)
# =========================
results = []

for look_back in range(1, 13):
    for units in range(1, 13):

        X_tr, y_tr, X_val, y_val, X_te, y_te = create_dataset_full(
            train_scaled, test_scaled, look_back
        )

        model = Sequential()
        model.add(LSTM(units, input_shape=(1, look_back)))
        model.add(Dense(1))
        model.compile(loss='mse', optimizer='adam')

        model.fit(
            X_tr, y_tr,
            epochs=50,
            batch_size=1,
            verbose=0
        )

        val_pred = scaler.inverse_transform(model.predict(X_val))
        y_val_inv = scaler.inverse_transform(y_val.reshape(-1,1))

        rmse_val = mean_squared_error(y_val_inv, val_pred, squared=False)

        results.append({
            "look_back": look_back,
            "units": units,
            "rmse_val": rmse_val,
            "model": model
        })

        print(f"look_back={look_back}, units={units}, RMSE_val={rmse_val:.3f}")

# =========================
# WYBÓR NAJLEPSZEGO MODELU
# =========================
best = min(results, key=lambda x: x["rmse_val"])

print("\nNAJLEPSZY MODEL")
print(f"look_back: {best['look_back']}")
print(f"units: {best['units']}")
print(f"RMSE VAL: {best['rmse_val']:.3f}")

# =========================
# BACKTEST + WIZUALIZACJA
# =========================
X_tr, y_tr, X_val, y_val, X_te, y_te = create_dataset_full(
    train_scaled, test_scaled, best["look_back"]
)

backtest_plot(best["model"], X_te, y_te, best["look_back"]