In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split
from gluonts.torch import DeepAREstimator

In [None]:
df = pd.read_csv('../../3OEC_current_flow.csv')

df["O2_avg"] = df[["O2_S1", "O2_S2", "O2_S3"]].mean(axis=1)

torch.manual_seed(42)

from datetime import datetime, timedelta

start_time_11 = datetime(2017, 7, 11, 14, 0, 0)
end_time_11 = datetime(2017, 7, 12, 8, 0, 0)

start_time_13 = datetime(2017, 7, 13, 11, 0, 0)
end_time_13 = datetime(2017, 7, 14, 6, 0, 0)

start_time_15 = datetime(2017, 7, 15, 10, 0, 0)
end_time_15 = datetime(2017, 7, 16, 6, 0, 0)

start_time_16 = datetime(2017, 7, 16, 16, 0, 0)
end_time_16 = datetime(2017, 7, 17, 6, 0, 0)

deployments = {
    "3oec_2017_7_11_12": {"start": start_time_11, "end": end_time_11},
    "3oec_2017_7_13_14": {"start": start_time_13, "end": end_time_13},
    "3oec_2017_7_15_16": {"start": start_time_15, "end": end_time_15},
    "3oec_2017_7_16_17": {"start": start_time_16, "end": end_time_16}
}

date_ranges = []

for deployment_name, deployment_info in deployments.items():
    start_time = deployment_info["start"]
    end_time = deployment_info["end"]
    if deployment_name == "3oec_2017_7_13_14":
        start_time -= timedelta(seconds=0.125)
    print(start_time)

    # Calculate total seconds and number of measurements
    total_seconds = (end_time - start_time).total_seconds() + 0.125
    num_measurements = int(total_seconds * 8)

    # Create DatetimeIndex for the deployment
    date_range = pd.date_range(start=start_time, periods=num_measurements, freq=f'{1000/8}ms')
    print(date_range[0], date_range[-1])
    print(len(date_range))
    date_ranges.append(pd.Series(date_range))

# Concatenate all DatetimeIndexes
complete_index = pd.concat(date_ranges)

# Set the complete index to your DataFrame
df.index = complete_index

In [None]:
# drop deployment column and resample
df_resampled = df.drop(columns=['deployment', 't', 't_increase', 'Vx', 'Vy', 'Vz', 'P', 'O2_S1', 'O2_S2', 'O2_S3']).resample('5min').mean()
first_piece = df_resampled["2017-07-11":"2017-07-12 06:00:00"]
second_piece = df_resampled["2017-07-13 12:00:00":"2017-07-14 06:00:00"]
third_piece = df_resampled["2017-07-15 12:00:00":"2017-07-16 6:00:00"]
fourth_piece = df_resampled["2017-07-16 16:00:00":"2017-07-17"]

In [None]:
first_piece['timestamp'] = first_piece.index
first_piece
second_piece['timestamp'] = second_piece.index

In [None]:
from gluonts.dataset.util import to_pandas
freq="5min"

In [None]:
# wrap first_piece and second_piece separately
train_full = PandasDataset(
    dataframes=first_piece,
    freq=freq,
    target="O2_avg",
    timestamp="timestamp",
)

test_full = PandasDataset(
    dataframes=second_piece,
    freq=freq,
    target="O2_avg",
    timestamp="timestamp",
)

# now: create a "test template" *from the test_full dataset*
# choose a split point inside second_piece where you want forecasting to start.
# usually that's just the start of second_piece if you want to forecast all of it.
# So we split right at the first timestamp of second_piece.

prediction_length = 6  # 6 * 5min = 30 minutes
test_start_period = pd.Period(
    second_piece["timestamp"].iloc[0],
    freq=freq
)

_, test_template = split(
    test_full,
    date=test_start_period
)

# NOW we can call generate_instances on test_template
test_pairs = test_template.generate_instances(
    prediction_length=prediction_length,
    windows=10,
)

############################################
# 2. Helper: PeriodIndex -> DatetimeIndex
############################################
def _to_datetime_index(s_or_df):
    obj = s_or_df.copy()
    if isinstance(obj.index, pd.PeriodIndex):
        obj.index = obj.index.to_timestamp()  # cast PeriodIndex -> DatetimeIndex
    return obj

############################################
# 3. Shading helper for visualization
############################################
def highlight_entry(entry, color, label=None):
    """
    entry["start"] is a pandas.Period
    entry["target"] is array-like of values
    We shade from the first timestamp covered by this entry
    to the timestamp immediately after the last step.
    """
    start_period = entry["start"]
    n = int(len(entry["target"]))
    left = start_period.to_timestamp()
    right = (start_period + n).to_timestamp()
    plt.axvspan(left, right, facecolor=color, alpha=0.2, label=label)

############################################
# 4. Make windows from the test dataset
############################################
# You said: windows of length 30 min.
# Your freq is 5min, so 30 min = 6 steps.


############################################
# 5. Plotting
############################################
def plot_train_and_test_windows(train_dataset, test_dataset, test_pairs):
    # Plot training dataset coverage (red)
    for original_entry, train_entry in zip(train_dataset, train_dataset):
        s = _to_datetime_index(to_pandas(original_entry))
        ax = s.plot()
        highlight_entry(train_entry, "red", label="training range")
        ax.legend(["signal", "training range"], loc="upper left")
        plt.title("Training dataset coverage")
        plt.show()

    # Plot each (context, label) window on top of the test series
    for original_entry in test_dataset:
        s = _to_datetime_index(to_pandas(original_entry))
        for test_input, test_label in test_pairs:
            ax = s.plot()
            highlight_entry(test_input, "green", label="model input/context")
            highlight_entry(test_label, "blue", label="prediction target (30 min)")
            ax.legend(
                ["signal", "model input/context", "prediction target (30 min)"],
                loc="upper left",
            )
            plt.title("Test windows")
            plt.show()

############################################
# 6. Call the plotting function
############################################
plot_train_and_test_windows(train_full, test_full, test_pairs)

In [None]:
estimator = DeepAREstimator(
    freq="5min",
    prediction_length=prediction_length,
    trainer_kwargs={"max_epochs": 50}
)

predictor = estimator.train(train_full)

In [None]:
from gluonts.evaluation import make_evaluation_predictions, Evaluator
first_pair = next(iter(test_pairs))
second_pair = next(iter(test_pairs))
third_pair = next(iter(test_pairs))

forecast_it, ts_it = make_evaluation_predictions(
    dataset=first_pair,      # pass the TestData as-is
    predictor=predictor,
)

forecasts = list(forecast_it)   # materialize generators
labels   = list(ts_it)

evaluator = Evaluator(quantiles=(np.arange(20)/20.0)[1:])
agg_metrics, item_metrics = evaluator(labels, forecasts)


In [None]:
test_pairs

In [None]:
forecast_it