# Imports

In [1]:
import os
import pandas as pd
import numpy as np

In [2]:
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
%matplotlib inline

In [4]:
data_dir = os.getcwd() + '/Data/'

# Parse dates and set TIMESTAMP as index. Will be useful later
traindata = pd.read_csv(data_dir + 'TrainData.csv', parse_dates=['TIMESTAMP'])

# Removing WS100 related data
traindata = traindata.drop(['U100', 'V100', 'WS100'], axis=1)
forecast_input = pd.read_csv(data_dir + "WeatherForecastInput.csv", parse_dates=['TIMESTAMP'], index_col=0)
solution = pd.read_csv(data_dir + 'Solution.csv', parse_dates=['TIMESTAMP'])

In [5]:
traindata = traindata.filter(['POWER'], axis=1)
traindata.head(25)

Unnamed: 0,POWER
0,0.273678
1,0.086796
2,0.006811
3,0.018646
4,0.034812
5,0.021917
6,0.018233
7,0.009642
8,0.005535
9,0.017217


In [6]:
traindata.shape

(16080, 1)

In [7]:
# traindata = traindata.squeeze()

In [8]:
traindata

Unnamed: 0,POWER
0,0.273678
1,0.086796
2,0.006811
3,0.018646
4,0.034812
...,...
16075,0.101254
16076,0.105047
16077,0.145079
16078,0.180933


In [9]:
solution.head()

Unnamed: 0,TIMESTAMP,POWER
0,2013-11-01 01:00:00,0.167215
1,2013-11-01 02:00:00,0.063998
2,2013-11-01 03:00:00,0.039035
3,2013-11-01 04:00:00,0.036232
4,2013-11-01 05:00:00,0.064888


In [10]:
# solution = solution.squeeze()

In [11]:
all_data = traindata.append(solution)
all_data

Unnamed: 0,POWER,TIMESTAMP
0,0.273678,NaT
1,0.086796,NaT
2,0.006811,NaT
3,0.018646,NaT
4,0.034812,NaT
...,...,...
715,0.963328,2013-11-30 20:00:00
716,0.948834,2013-11-30 21:00:00
717,0.925991,2013-11-30 22:00:00
718,0.773840,2013-11-30 23:00:00


---

# Time Encoding

In order to forecast wind power production using the time series data, we are required to reduce the task to the simpler task of tabular regression. This allows to apply any regression algorithm to the forecasting problem. 


In [12]:
def transform_data(data, in_num, fh): 
    """
    Splits the series into train and test sets. Each step takes multiple points as inputs
    :param data: an individual TS
    :param fh: number of out of sample points
    :param in_num: number of input points for the forecast
    :return:
    """
    x, y = d[:-1], np.roll(d, -in_num)[:-in_num]
    x = np.reshape(x, (-1, 1))
    
    tmp = np.roll(x, -1)
    
    for i in range(1, in_num):
        x = np.concatenate((x[:-1], tmp[:-1]), 1)
        tmp = np.roll(tmp, -1)[:-1]
        
    return x, y
    

In [13]:
traindata = traindata.index

In [14]:
print(traindata.shape)

(16080,)


In [15]:
fh = np.arange(1, len(solution) + 1)
print(fh.shape)
in_num = 24
#print(fh)

train = traindata.values[:-24] # 24 out of sample due to fh
print(train)
print(train.shape)
x = train # ?
y = np.roll(train, -in_num)[:-in_num]

(720,)
[    0     1     2 ... 16053 16054 16055]
(16056,)


In [16]:
print(x.shape)
x

(16056,)


array([    0,     1,     2, ..., 16053, 16054, 16055])

In [17]:
x = np.reshape(x, (-1, 1))
print(x.shape)
print(x)

(16056, 1)
[[    0]
 [    1]
 [    2]
 ...
 [16053]
 [16054]
 [16055]]


In [18]:
print(y.shape)
y

(16032,)


array([   24,    25,    26, ..., 16053, 16054, 16055])

In [19]:
tmp = np.roll(x, -1)
print(tmp.shape)
print(tmp)
for i in range(1, in_num):
    x = np.concatenate((x[:-1], tmp[:-1]), 1)
    tmp = np.roll(tmp, -1)[:-1]
    
print(x.shape)
x

(16056, 1)
[[    1]
 [    2]
 [    3]
 ...
 [16054]
 [16055]
 [    0]]
(16033, 24)


array([[    0,     1,     2, ...,    21,    22,    23],
       [    1,     2,     3, ...,    22,    23,    24],
       [    2,     3,     4, ...,    23,    24,    25],
       ...,
       [16030, 16031, 16032, ..., 16051, 16052, 16053],
       [16031, 16032, 16033, ..., 16052, 16053, 16054],
       [16032, 16033, 16034, ..., 16053, 16054, 16055]])

In [20]:
tmp = np.roll(x, -1)
print(tmp.shape)
tmp

(16033, 24)


array([[    1,     2,     3, ...,    22,    23,     1],
       [    2,     3,     4, ...,    23,    24,     2],
       [    3,     4,     5, ...,    24,    25,     3],
       ...,
       [16031, 16032, 16033, ..., 16052, 16053, 16031],
       [16032, 16033, 16034, ..., 16053, 16054, 16032],
       [16033, 16034, 16035, ..., 16054, 16055,     0]])

In [21]:
#for i in range(1, in_num):
 #   x = np.concatenate((x[:-1], tmp[:-1]), 1)

In [22]:
print(x[:-1].shape)
x[:-1]

(16032, 24)


array([[    0,     1,     2, ...,    21,    22,    23],
       [    1,     2,     3, ...,    22,    23,    24],
       [    2,     3,     4, ...,    23,    24,    25],
       ...,
       [16029, 16030, 16031, ..., 16050, 16051, 16052],
       [16030, 16031, 16032, ..., 16051, 16052, 16053],
       [16031, 16032, 16033, ..., 16052, 16053, 16054]])

In [23]:
print(tmp[:-1].shape)
tmp[:-1]

(16032, 24)


array([[    1,     2,     3, ...,    22,    23,     1],
       [    2,     3,     4, ...,    23,    24,     2],
       [    3,     4,     5, ...,    24,    25,     3],
       ...,
       [16030, 16031, 16032, ..., 16051, 16052, 16030],
       [16031, 16032, 16033, ..., 16052, 16053, 16031],
       [16032, 16033, 16034, ..., 16053, 16054, 16032]])

In [24]:
b = np.concatenate((x[:-1], tmp[:-1]), 1)
print(b.shape)
b

(16032, 48)


array([[    0,     1,     2, ...,    22,    23,     1],
       [    1,     2,     3, ...,    23,    24,     2],
       [    2,     3,     4, ...,    24,    25,     3],
       ...,
       [16029, 16030, 16031, ..., 16051, 16052, 16030],
       [16030, 16031, 16032, ..., 16052, 16053, 16031],
       [16031, 16032, 16033, ..., 16053, 16054, 16032]])

In [25]:
tmp = np.roll(tmp, -1)[:-1]

In [26]:
print(tmp)
print(tmp.shape)

[[    2     3     4 ...    23     1     2]
 [    3     4     5 ...    24     2     3]
 [    4     5     6 ...    25     3     4]
 ...
 [16031 16032 16033 ... 16052 16030 16031]
 [16032 16033 16034 ... 16053 16031 16032]
 [16033 16034 16035 ... 16054 16032 16033]]
(16032, 24)


In [27]:
b = np.concatenate((b[:-1], tmp[:-1]), 1)
print(b.shape)
b

(16031, 72)


array([[    0,     1,     2, ...,    23,     1,     2],
       [    1,     2,     3, ...,    24,     2,     3],
       [    2,     3,     4, ...,    25,     3,     4],
       ...,
       [16028, 16029, 16030, ..., 16051, 16029, 16030],
       [16029, 16030, 16031, ..., 16052, 16030, 16031],
       [16030, 16031, 16032, ..., 16053, 16031, 16032]])

In [28]:
x[24]

array([24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
       41, 42, 43, 44, 45, 46, 47])

In [29]:
y

array([   24,    25,    26, ..., 16053, 16054, 16055])

In [30]:
l = np.roll([0,1,2,3,4,5], -1)[:-1]
print(l)
k = np.array([0,1,2,3,4,5])[1:]
print(k)

[1 2 3 4 5]
[1 2 3 4 5]


In [31]:
print(traindata.shape)

(16080,)


In [32]:
x[:5,:]

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23],
       [ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19, 20, 21, 22, 23, 24],
       [ 2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25],
       [ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26],
       [ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20, 21, 22, 23, 24, 25, 26, 27]])

In [33]:
x.shape

(16033, 24)

In [34]:
y.shape

(16032,)

In [35]:
def transform_test_data(data, in_num, fh): 
    """
    Splits the series into train and test sets. Each step takes multiple points as inputs
    :param data: an individual TS
    :param fh: number of out of sample points
    :param in_num: number of input points for the forecast
    :return:
    """
    d = data[-(fh + in_num):]
    x, y = d[:-1], np.roll(d, -in_num)[:-in_num]
    x = np.reshape(x, (-1, 1))
    
    tmp = np.roll(x, -1)
    
    for i in range(1, in_num):
        x = np.concatenate((x[:-1], tmp[:-1]), 1)
        tmp = np.roll(tmp, -1)[:-1]
        
    return x, y

In [36]:
x_test, y_test = transform_test_data(solution.values, 24, 24)

In [37]:
x_test.shape

(71, 24)

In [38]:
x_test

array([[Timestamp('2013-11-29 01:00:00'), 0.8750853672,
        Timestamp('2013-11-29 02:00:00'), ..., 0.6788487114,
        Timestamp('2013-11-29 12:00:00'), 0.6757934654],
       [0.8750853672, Timestamp('2013-11-29 02:00:00'), 0.6945562704,
        ..., Timestamp('2013-11-29 12:00:00'), 0.6757934654,
        Timestamp('2013-11-29 13:00:00')],
       [Timestamp('2013-11-29 02:00:00'), 0.6945562704,
        Timestamp('2013-11-29 03:00:00'), ..., 0.6757934654,
        Timestamp('2013-11-29 13:00:00'), 0.6970184393],
       ...,
       [Timestamp('2013-11-30 11:00:00'), 0.5097498293,
        Timestamp('2013-11-30 12:00:00'), ..., 0.9488336149,
        Timestamp('2013-11-30 22:00:00'), 0.9259911578],
       [0.5097498293, Timestamp('2013-11-30 12:00:00'), 0.6541820927,
        ..., Timestamp('2013-11-30 22:00:00'), 0.9259911578,
        Timestamp('2013-11-30 23:00:00')],
       [Timestamp('2013-11-30 12:00:00'), 0.6541820927,
        Timestamp('2013-11-30 13:00:00'), ..., 0.9259911578,
 

In [39]:
from sklearn.neural_network import MLPRegressor

In [40]:
ann = MLPRegressor(solver='lbfgs', activation='tanh', hidden_layer_sizes=[100, 100])

In [41]:
ann.fit(x,y)

ValueError: Found input variables with inconsistent numbers of samples: [16033, 16032]

In [42]:
test = traindata.values[-(24 + in_num):] # 24 out of sample due to fh

In [43]:
print(test.shape)
print(test)

(48,)
[16032 16033 16034 16035 16036 16037 16038 16039 16040 16041 16042 16043
 16044 16045 16046 16047 16048 16049 16050 16051 16052 16053 16054 16055
 16056 16057 16058 16059 16060 16061 16062 16063 16064 16065 16066 16067
 16068 16069 16070 16071 16072 16073 16074 16075 16076 16077 16078 16079]


In [44]:
print(train.shape)
print(train)

(16056,)
[    0     1     2 ... 16053 16054 16055]


In [48]:
print(x.shape)
print(x)
print(x[-1, :].shape)
print(x[-1, :])
print(x[-1, :].reshape(1, -1).shape)
print(x[-1, :].reshape(1, -1))

(16033, 24)
[[    0     1     2 ...    21    22    23]
 [    1     2     3 ...    22    23    24]
 [    2     3     4 ...    23    24    25]
 ...
 [16030 16031 16032 ... 16051 16052 16053]
 [16031 16032 16033 ... 16052 16053 16054]
 [16032 16033 16034 ... 16053 16054 16055]]
(24,)
[16032 16033 16034 16035 16036 16037 16038 16039 16040 16041 16042 16043
 16044 16045 16046 16047 16048 16049 16050 16051 16052 16053 16054 16055]
(1, 24)
[[16032 16033 16034 16035 16036 16037 16038 16039 16040 16041 16042 16043
  16044 16045 16046 16047 16048 16049 16050 16051 16052 16053 16054 16055]]


In [None]:
preds = []

last_window = x[-1, :].reshape(1, -1)
last_pred = 

In [None]:
pred = ann.predict(x_test)

In [None]:
ann.score(x_test, y_test)

In [62]:
from sktime.performance_metrics.forecasting import smape_loss
y_test= pd.Series(y_test, index=solution.index[-24:])
smape_loss(pd.Series(pred, index=y_test.index), y_test)

ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [63]:
from sktime.datasets import load_airline 
from sktime.performance_metrics.forecasting import make_forecasting_scorer

In [None]:
make_forecasting_scorer

In [65]:
d = pd.DataFrame(load_airline())
d

Unnamed: 0_level_0,Number of airline passengers
Period,Unnamed: 1_level_1
1949-01,112.0
1949-02,118.0
1949-03,132.0
1949-04,129.0
1949-05,121.0
...,...
1960-08,606.0
1960-09,508.0
1960-10,461.0
1960-11,390.0


In [None]:
d.tail(36)

In [None]:


# slightly modified code from the M4 competition
def split_into_train_test(data, in_num, fh):
    """
    Splits the series into train and test sets. Each step takes multiple points as inputs
    :param data: an individual TS
    :param fh: number of out of sample points
    :param in_num: number of input points for the forecast
    :return:
    """
    train, test = data[:-fh], data[-(fh + in_num):]
    x_train, y_train = train[:-1], np.roll(train, -in_num)[:-in_num]
    x_test, y_test = test[:-1], np.roll(test, -in_num)[:-in_num]
#     x_test, y_test = train[-in_num:], np.roll(test, -in_num)[:-in_num]

    # reshape input to be [samples, time steps, features] (N-NF samples, 1 time step, 1 feature)
    x_train = np.reshape(x_train, (-1, 1))
    x_test = np.reshape(x_test, (-1, 1))
    temp_test = np.roll(x_test, -1)
    temp_train = np.roll(x_train, -1)
    for x in range(1, in_num):
        x_train = np.concatenate((x_train[:-1], temp_train[:-1]), 1)
        x_test = np.concatenate((x_test[:-1], temp_test[:-1]), 1)
        temp_test = np.roll(temp_test, -1)[:-1]
        temp_train = np.roll(temp_train, -1)[:-1]

    return x_train, y_train, x_test, y_test

In [None]:
fh = np.arange(1, 36 + 1)  # we add 1 because the `stop` value is exclusive in `np.arange`
fh

In [None]:
x_train, y_train, x_test, y_test = split_into_train_test(d.index.values, 10, len(fh))
print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

In [None]:
x_train

In [None]:
print(x_test.shape)
print(x_test)

In [None]:
y_test

In [None]:
y_test = pd.Series(y_test, index=d.index[-len(fh):])

In [None]:
y_test

In [None]:
all_data = all_data.squeeze()

In [None]:
x_train, y_train, x_test, y_test = split_into_train_test(all_data.values, 24, 672)

In [None]:
x_train.shape

In [None]:
ann2 = MLPRegressor(solver='lbfgs', activation='tanh', hidden_layer_sizes=[100, 100])

In [None]:
ann2.fit(x_train, y_train)

In [None]:
ann2.get_params().keys()

In [None]:
p = ann2.predict(x_test)


In [None]:
from sktime.performance_metrics.forecasting import smape_loss
y_test = pd.Series(y_test, index=all_data.index[-672:])
smape_loss(pd.Series(p, index=y_test.index), y_test)

In [None]:
p

In [None]:
from scripts._rmse import rmse_score, rmse

ann2_rmse = rmse(y_test, p)

In [None]:
ann2_rmse

In [None]:
ann2.score(x_test, y_test)

In [None]:
fig, ax = plt.subplot()

sns.lineplot(ax=ax, x=p.index, y=p, )

In [50]:
from sktime.forecasting.model_selection import temporal_train_test_split

In [49]:
traindata = pd.read_csv(data_dir + 'TrainData.csv', parse_dates=['TIMESTAMP'])
traindata = traindata.filter(['POWER'], axis=1)

In [57]:
y_train, y_test = temporal_train_test_split(traindata, fh=24)

In [59]:
print(y_train.shape)
print(y_train)

(16056, 1)
          POWER
0      0.273678
1      0.086796
2      0.006811
3      0.018646
4      0.034812
...         ...
16051  0.163680
16052  0.159726
16053  0.232109
16054  0.081647
16055  0.009372

[16056 rows x 1 columns]


In [60]:
print(y_test.shape)
print(y_test)

(1, 1)
          POWER
16079  0.236826


In [61]:
d

NameError: name 'd' is not defined