# Data Import

In [7]:
import pandas as pd

solar_energy_df = pd.read_csv("../ML_week3/solarenergy.csv",
                              delimiter=",",
                              index_col=0,
                              date_format="%d/%m/%Y %H:%M",
                              parse_dates=True).sort_index()

"""
solar_energy_df["Datetime"] = pd.to_datetime(solar_energy_df["Datetime"],
                                             format="%d/%m/%Y %H:%M")
solar_energy_df = solar_energy_df.set_index("Datetime").sort_index()
"""

solar_energy_df = solar_energy_df.dropna()
solar_energy_df = solar_energy_df.resample("1H").interpolate("linear")
solar_energy_df = \
  (solar_energy_df - solar_energy_df.mean()) / solar_energy_df.std()

solar_energy_df.head()

Unnamed: 0_level_0,solar_mw,wind-direction,wind-speed,humidity,average-wind-speed-(period),average-pressure-(period),temperature
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-05-03 02:00:00,-1.460457,0.663274,0.718079,0.096867,1.464472,-1.076407,0.641897
2020-05-03 03:00:00,-1.5829,0.663274,0.718079,-1.431309,1.190072,-1.144714,0.641897
2020-05-03 04:00:00,-1.672379,0.663274,0.718079,-1.112939,1.052872,-0.939792,0.641897
2020-05-03 05:00:00,-1.640198,0.663274,0.718079,-1.240287,0.641272,-1.0081,0.641897
2020-05-03 06:00:00,-1.440834,0.663274,0.718079,0.287889,-0.044728,-1.0081,0.641897


# Pandas to GluonTS

In [8]:
from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.split import split

freq="1H"

solar_energy_ds = PandasDataset(
    solar_energy_df, target="solar_mw", freq=freq
  )

prediction_length = 24
split_date = solar_energy_df.index[int(len(solar_energy_df)*0.7)]
(solar_energy_train,
 solar_energy_test_template) = split(solar_energy_ds,
                                      date=pd.Period(split_date,
                                                     freq="1H"))
solar_energy_test = \
  solar_energy_test_template.generate_instances(
      prediction_length=prediction_length,
      windows=10
    )

solar_energy_train

TrainingDataset(dataset=PandasDataset<size=1, freq=1H, num_feat_dynamic_real=0, num_past_feat_dynamic_real=0, num_feat_static_real=0, num_feat_static_cat=0, static_cardinalities=[]>, splitter=DateSplitter(date=Period('2020-07-06 10:00', 'H')))

# Model Instantiation

In [9]:
from gluonts.time_feature import (get_lags_for_frequency,
                                  time_features_from_frequency_str)
from transformers import AutoformerConfig, AutoformerForPrediction

lags_sequence = get_lags_for_frequency(freq)
time_features = time_features_from_frequency_str(freq)

config = AutoformerConfig(
    prediction_length=prediction_length,
    context_length=prediction_length*2,
    lags_sequence=lags_sequence,
    num_time_features=len(time_features)+1,
    encoder_layers=4,
    decoder_layers=4,
    d_model=32
  )

model = AutoformerForPrediction(config)