In [1]:
import numpy as np
import pandas as pd
from pytorch_forecasting import TimeSeriesDataSet

  import pkg_resources


In [2]:
mvtseries = pd.read_csv(
    "../assets/datasets/time_series_smf1.csv",
    parse_dates=["datetime"],
    index_col="datetime",
)

stat_by_variable = {
    "Incoming Solar": "sum",
    "Wind Dir": "mean",
    "Snow Depth": "sum",
    "Wind Speed": "mean",
    "Dewpoint": "mean",
    "Precipitation": "sum",
    "Vapor Pressure": "mean",
    "Relative Humidity": "mean",
    "Air Temp": "max",
}

mvtseries = mvtseries.resample("D").agg(stat_by_variable)
mvtseries = mvtseries.ffill()

mvtseries.to_csv("../assets/daily_multivariate_timeseries.csv")

In [3]:
mvtseries.tail()

Unnamed: 0_level_0,Incoming Solar,Wind Dir,Snow Depth,Wind Speed,Dewpoint,Precipitation,Vapor Pressure,Relative Humidity,Air Temp
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2013-09-26,1034.2,137.083333,0.0,0.55,1.645833,12.3,689.458333,0.97,3.9
2013-09-27,2182.3,108.958333,0.0,0.608333,0.529167,0.0,634.625,0.85,8.3
2013-09-28,3384.5,97.25,0.0,0.708333,0.670833,0.0,642.125,0.60625,16.5
2013-09-29,478.2,108.666667,0.0,1.0375,2.129167,0.6,719.083333,0.48625,13.9
2013-09-30,2554.8,115.25,0.0,0.9625,4.7875,2.0,863.416667,0.747083,12.2


In [16]:
TARGET = "Incoming Solar"
N_LAGS = 3
HORIZON = 1

input_data = []
output_data = []
for i in range(N_LAGS, mvtseries.shape[0] - HORIZON + 1):
    input_data.append(mvtseries.iloc[i - N_LAGS : i].values)
    output_data.append(mvtseries.iloc[i : (i + HORIZON)][TARGET])

input_data, output_data = np.array(input_data), np.array(output_data)
input_data.shape, output_data.shape

((2189, 3, 11), (2189, 1))

In [17]:
mvtseries.T.head(5)

datetime,2007-10-01,2007-10-02,2007-10-03,2007-10-04,2007-10-05,2007-10-06,2007-10-07,2007-10-08,2007-10-09,2007-10-10,...,2013-09-21,2013-09-22,2013-09-23,2013-09-24,2013-09-25,2013-09-26,2013-09-27,2013-09-28,2013-09-29,2013-09-30
Incoming Solar,1381.5,3953.2,3098.1,2213.9,1338.8,3671.5,4193.7,4213.8,4195.5,1340.2,...,2581.0,1592.0,4113.6,2134.2,1250.2,1034.2,2182.3,3384.5,478.2,2554.8
Wind Dir,281.583333,173.416667,263.875,209.227273,234.409091,248.375,238.130435,159.583333,128.833333,224.291667,...,104.875,144.833333,51.888889,51.888889,157.095238,137.083333,108.958333,97.25,108.666667,115.25
Snow Depth,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Wind Speed,0.604167,0.595833,0.758333,0.591667,0.795833,0.8,0.645833,0.629167,0.845833,0.670833,...,0.795833,0.6625,0.654167,0.641667,0.529167,0.55,0.608333,0.708333,1.0375,0.9625
Dewpoint,1.145833,-0.704167,0.541667,-0.754167,-2.558333,-4.525,-0.404167,-2.583333,-5.325,-2.583333,...,0.1125,3.554167,2.433333,2.529167,1.104167,1.645833,0.529167,0.670833,2.129167,4.7875


In [18]:
mvtseries["time_index"] = np.arange(mvtseries.shape[0])
mvtseries["group_id"] = 0
mvtseries.tail()

Unnamed: 0_level_0,Incoming Solar,Wind Dir,Snow Depth,Wind Speed,Dewpoint,Precipitation,Vapor Pressure,Relative Humidity,Air Temp,time_index,group_id
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2013-09-26,1034.2,137.083333,0.0,0.55,1.645833,12.3,689.458333,0.97,3.9,2187,0
2013-09-27,2182.3,108.958333,0.0,0.608333,0.529167,0.0,634.625,0.85,8.3,2188,0
2013-09-28,3384.5,97.25,0.0,0.708333,0.670833,0.0,642.125,0.60625,16.5,2189,0
2013-09-29,478.2,108.666667,0.0,1.0375,2.129167,0.6,719.083333,0.48625,13.9,2190,0
2013-09-30,2554.8,115.25,0.0,0.9625,4.7875,2.0,863.416667,0.747083,12.2,2191,0


In [19]:
# create the dataset from the pandas dataframe
dataset = TimeSeriesDataSet(
    data=mvtseries,
    group_ids=["group_id"],
    target="Incoming Solar",
    time_idx="time_index",
    max_encoder_length=7,
    max_prediction_length=1,
    time_varying_unknown_reals=[
        "Incoming Solar",
        "Wind Dir",
        "Snow Depth",
        "Wind Speed",
        "Dewpoint",
        "Precipitation",
        "Vapor Pressure",
        "Relative Humidity",
        "Air Temp",
    ],
)

In [20]:
# convert the dataset to a dataloader
data_loader = dataset.to_dataloader(batch_size=1, shuffle=False)

x, y = next(iter(data_loader))

x["encoder_cont"]
y


(tensor([[4213.7998]]), None)