In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import pandas as pd
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import xgboost as xgb
import pickle

In [None]:
II_112_1_full = "full_II_112_1.pickle"
II_113_1_full = "full_II_113_1.pickle"
II_114_1_full = "full_II_114_1.pickle"
II_131_1_full = "full_II_131_1.pickle"
II_132_1_full = "full_II_132_1.pickle"
II_292_1_full = "full_II_292_1.pickle"
II_297_1_full = "full_II_297_1.pickle"
II_298_1_full = "full_II_298_1.pickle"
II_472_1_full = "full_II_472_1.pickle"
II_924_1_full = "full_II_924_1.pickle"
II_931_1_full = "full_II_931_1.pickle"
II_940_1_full = "full_II_940_1.pickle"
II_952_1_full = "full_II_952_1.pickle"
II_1345_1_full = "full_II_1345_1.pickle"
II_1346_1_full = "full_II_1346_1.pickle"
II_1351_1_full = "full_II_1351_1.pickle"
II_1352_1_full = "full_II_1352_1.pickle"

II_112_1_grace = "grace_II_112_1.pickle"
II_113_1_grace = "grace_II_113_1.pickle"
II_114_1_grace = "grace_II_114_1.pickle"
II_131_1_grace = "grace_II_131_1.pickle"
II_132_1_grace = "grace_II_132_1.pickle"
II_292_1_grace = "grace_II_292_1.pickle"
II_297_1_grace = "grace_II_297_1.pickle"
II_298_1_grace = "grace_II_298_1.pickle"
II_472_1_grace = "grace_II_472_1.pickle"
II_924_1_grace = "grace_II_924_1.pickle"
II_931_1_grace = "grace_II_931_1.pickle"
II_940_1_grace = "grace_II_940_1.pickle"
II_952_1_grace = "grace_II_952_1.pickle"
II_1345_1_grace = "grace_II_1345_1.pickle"
II_1346_1_grace = "grace_II_1346_1.pickle"
II_1351_1_grace = "grace_II_1351_1.pickle"
II_1352_1_grace = "grace_II_1352_1.pickle"


In [None]:
models_dir = "data/models"
models_300_dir = "data/models_300"
models_1000_dir = "data/models_1000"

In [None]:
stations_full = [II_112_1_full, II_113_1_full, II_114_1_full, II_131_1_full, II_132_1_full, II_292_1_full, II_297_1_full, II_298_1_full, II_472_1_full, II_924_1_full, II_931_1_full, II_940_1_full, II_952_1_full, II_1345_1_full, II_1346_1_full, II_1351_1_full, II_1352_1_full]
stations_grace = [II_112_1_grace, II_113_1_grace, II_114_1_grace, II_131_1_grace, II_132_1_grace, II_292_1_grace, II_297_1_grace, II_298_1_grace, II_472_1_grace, II_924_1_grace, II_931_1_grace, II_940_1_grace, II_952_1_grace, II_1345_1_grace, II_1346_1_grace, II_1351_1_grace, II_1352_1_grace]

In [None]:
def prepare_data_for_station(station):
  data_path = "data/network_input"
  dataset_path = os.path.join(data_path, station)
  data = pd.read_pickle(dataset_path)
  ata_arr = data.to_numpy()

  feat_data = data.loc[:, data.columns != "target_value"]
  target_data = data.loc[:, data.columns == "target_value"]

  feat_arr = feat_data.to_numpy()
  target_arr = target_data.to_numpy()

  target_arr = np.array(target_arr, dtype=np.float32).reshape(-1, 1)
  data_arr_2 = []
  for row in feat_arr:
      row = np.vstack(row)
      data_arr_2.append(row)

  target_arr_2 = []
  for row in target_arr:
      row = np.hstack(row)
      for item in row:
          target_arr_2.append(item)
  data_arr_2 = np.array(data_arr_2, dtype=np.float32)
  data_flatten = np.array([row.flatten() for row in data_arr_2])

  seq_len = 4
  sequences = []
  targets = []
  flat_seq = []
  flat_targets = []

  for i in range(len(data_flatten) - seq_len + 1):
          sequences.append(data_flatten[i:i + seq_len])  # Wyłączenie ostatniej kolumny (target) z sekwencji
          targets.append(np.array([target_arr_2[i + seq_len -1]]))
          flat_seq.append(data_flatten[i:i + seq_len].flatten())  # Wyłączenie ostatniej kolumny (target) z sekwencji
          flat_targets.append(np.array([target_arr_2[i + seq_len-1]]).flatten())

  sequences = np.array(sequences, dtype=np.float32)
  targets = np.array(targets, dtype=np.float32)

  flat_seq = np.array(flat_seq, dtype=np.float32)
  flat_targets = np.array(flat_targets, dtype=np.float32)

  train_sequences = sequences[:int(sequences.shape[0] * 0.7)]
  train_targets = targets[:int(targets.shape[0] * 0.7)]
  test_sequences = sequences[int(sequences.shape[0] * 0.7):]
  test_targets = targets[int(targets.shape[0] * 0.7):]

  train_flat_sequences = flat_seq[:int(flat_seq.shape[0] * 0.7)]
  train_flat_targets = flat_targets[:int(flat_targets.shape[0] * 0.7)]
  test_flat_sequences = flat_seq[int(flat_seq.shape[0] * 0.7):]
  test_flat_targets = flat_targets[int(flat_targets.shape[0] * 0.7):]

  data_tensor = torch.tensor(train_sequences, dtype=torch.float32)
  target_tensor = torch.tensor(train_targets, dtype=torch.float32)

  test_data_tensor = torch.tensor(test_sequences, dtype=torch.float32)
  test_target_tensor = torch.tensor(test_targets, dtype=torch.float32)

  return data_tensor, target_tensor, test_data_tensor, test_target_tensor, train_flat_sequences, train_flat_targets, test_flat_sequences, test_flat_targets


In [None]:
def train_xgb(train_flat_sequences, train_flat_targets, test_flat_sequences):
    xgb_model = xgb.XGBRegressor()
    trained_model = xgb_model.fit(train_flat_sequences, train_flat_targets)
    return trained_model.predict(train_flat_sequences), trained_model.predict(test_flat_sequences)
    


In [None]:
def denormalize_data(data, min, max):
    return data * (max - min) + min

In [None]:
def test_model(model, station, denormalize = False):
    station_name = station
    if "full" in station:
        station_name = station.replace("full_", "")
    if "grace" in station:
        station_name = station.replace("grace_", "")
        
    normalization_data = pd.read_pickle(os.path.join("data", "normalization", "normalization_values_" + station_name))
    train_data_tensor, train_target_tensor, test_data_tensor, test_target_tensor, train_flat_sequences, train_flat_targets, test_flat_sequences, test_flat_targets = prepare_data_for_station(station)
    
    train_ds = CustomDataset(train_data_tensor, train_target_tensor)
    test_ds = CustomDataset(test_data_tensor, test_target_tensor)

    train_dataloader = DataLoader(train_ds, batch_size=len(train_ds), shuffle=False)
    test_dataloader = DataLoader(test_ds, batch_size=len(test_ds), shuffle=False)
    
    model.cpu()
    
    y_true = []
    y_train_pred = []
    y_test_pred = []
    y_test_true = []
    model.eval()

    idx = 0
    with torch.no_grad():
        for batch_x, batch_y in train_dataloader:
            y_true += batch_y.tolist()
            y_train_pred += model(batch_x).tolist()
        idx = len(y_train_pred)
        for batch_x, batch_y in test_dataloader:
            trues = batch_y.tolist()
            y_true += trues
            y_test_true += trues
            preds = model(batch_x).tolist()                
            y_train_pred += preds
            y_test_pred += preds
            break

    print(idx)
    grace_dates_dir = "data/grace_dates.pickle"
    with open(grace_dates_dir, 'rb') as f:
        grace_dates = pickle.load(f)
    
    data_results = pd.DataFrame({
        "date": grace_dates[:len(y_true)],
        "target": y_true,
        "pred": y_train_pred
    })

    data_results["date"] = pd.to_datetime(data_results["date"])
    data_results["target"] = data_results["target"].apply(lambda x: x[0])
    data_results["pred"] = data_results["pred"].apply(lambda x: x[0])
    

    types = ["train" if i < idx else "test" for i in range(len(data_results))]
    data_results["type"] = types

    train_forecasts, test_forecasts = train_xgb(train_flat_sequences, train_flat_targets, test_flat_sequences)

    if denormalize:
        min_val = normalization_data.iloc[0]["min"]
        max_val = normalization_data.iloc[0]["max"]
        data_results["target"] = data_results["target"].apply(lambda x: denormalize_data(x, min_val, max_val))
        data_results["pred"] = data_results["pred"].apply(lambda x: denormalize_data(x, min_val, max_val))
        y_test_true = [denormalize_data(x[0], min_val, max_val) for x in y_test_true]
        y_test_pred = [denormalize_data(x[0], min_val, max_val) for x in y_test_pred]
        train_forecasts = [denormalize_data(x, min_val, max_val) for x in train_forecasts]
        test_forecasts = [denormalize_data(x, min_val, max_val) for x in test_forecasts]
        test_flat_targets = [denormalize_data(x, min_val, max_val) for x in test_flat_targets]
        

    mse = mean_squared_error(y_test_true, y_test_pred)
    mae = mean_absolute_error(y_test_true, y_test_pred)
    
    xgb_mse = mean_squared_error(test_flat_targets, test_forecasts)
    xgb_mae = mean_absolute_error(test_flat_targets, test_forecasts)
    
    data_results["xgb"] = np.concatenate((train_forecasts, test_forecasts))

    data_results["date"] = pd.to_datetime(data_results['date'])
    data_results.index = data_results["date"]
    
    station_name = station[:-7]    

    print()
    print("--------------------")
    print(f"Station: {station_name}")
    print(f"LSTM MSE: {mse:.4f}")
    print(f"LSTM MAE: {mae:.4f}")

    print(f"XGB MSE: {xgb_mse:.4f}")
    print(f"XGB MAE: {xgb_mae:.4f}")

    ax = data_results[: "2017-06-11"].plot(kind = "line", x = "date", y = "target", c = "blue", style = "-", label = "Target")
    data_results["2018-06-16" :].plot(kind = "line", x = "date", y = "target", c = "blue", style = "-", ax = ax, label = "_nolegend_")

    data_results[data_results["type"] == "train"][: "2017-06-11"].plot(kind = "line", x = "date", y = "pred", ax = ax, c = "green", style = "-", label = "LSTM Train set")
    data_results[data_results["type"] == "train"]["2018-06-16" :].plot(kind = "line", x = "date", y = "pred", ax = ax, c = "green", style = "-", label = "_nolegend_")

    data_results[data_results["type"] == "test"][: "2017-06-11"].plot(kind = "line", x = "date", y = "pred", ax = ax, c = "red", style = "-", label = "LSTM Test set")
    data_results[data_results["type"] == "test"]["2018-06-16" :].plot(kind = "line", x = "date", y = "pred", ax = ax, c = "red", style = "-", label = "_nolegend_")

    data_results[data_results["type"] == "train"][: "2017-06-11"].plot(kind = "line", x = "date", y = "xgb", ax = ax, c = "brown", style = "-", label = "XGB Train set")
    data_results[data_results["type"] == "train"]["2018-06-16" :].plot(kind = "line", x = "date", y = "xgb", ax = ax, c = "brown", style = "-", label = "_nolegend_")

    data_results[data_results["type"] == "test"][: "2017-06-11"].plot(kind = "line", x = "date", y = "xgb", ax = ax, c = "orange", style = "-", label = "XGB Test set")
    data_results[data_results["type"] == "test"]["2018-06-16" :].plot(kind = "line", x = "date", y = "xgb", ax = ax, c = "orange", style = "-", label = "_nolegend_")


    #plt.plot(y_true, label="Target")
    #plt.plot(y_pred, label="LSTM")
    #plt.plot(test_forecasts, label="XGB")
    ax.xaxis.set_major_locator(mdates.YearLocator())
    plt.legend()
    #plt.title(f"Station: {station_name}, LSTM MSE: {mse:.4f}, LSTM MAE: {mae:.4f}, XGB_MSE = {xgb_mse:.4f}, XGB_MAE = {xgb_mae:.4f}")
    #plt.title(f"Station: {station_name}, LSTM MSE: {mse:.4f}, LSTM MAE: {mae:.4f}")

    plt.savefig(os.path.join("data", "plots", f"{station_name}.png"))
    plt.show()

    return data_results

In [None]:
class CustomDataset(Dataset):
    def __init__(self, data, target):
        self.data = data
        self.target = target

    def __getitem__(self, index):
        return self.data[index], self.target[index]

    def __len__(self):
        return len(self.data)

In [None]:
class LSTMModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = torch.nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))

        out = self.fc(out[:, -1, :])
        return out

In [None]:
plt.rcParams["figure.figsize"] = (15,5)

In [None]:
station = II_297_1_full
model_path = os.path.join(models_dir, "model_" + station[:-6] + "pt")
model = torch.load(model_path)
test_model(model, station, denormalize = True)