Source/Reference:
https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/

In [1]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM

import numpy as np
import pandas as pd

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# Time series:
    # is sequence of numbers that are ordered by time index
    # i.e. a list or column of *ordered* values
    # Example:
df = pd.DataFrame()
df['t'] = [x for x in range(10)]
print(df)

   t
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9


In [3]:
# Supervised learning problem has inputs (X) and outputs (y)
    # Predict output patterns from inputs
df.rename(columns={'t':'X'}, inplace=True)
df['y'] = [x+1 for x in range(10)]
print(df)

   X   y
0  0   1
1  1   2
2  2   3
3  3   4
4  4   5
5  5   6
6  6   7
7  7   8
8  8   9
9  9  10


In [4]:
# Pandas shift() to convert time series data into supervised learning
    # Shift() to create duplicate of column data shifted forward or back

"This is the behavior required to create columns of lag observations as well as columns of forecast observations for a time series dataset in a supervised learning format."

In [5]:
from pandas import DataFrame
df = DataFrame()
df['t'] = [x for x in range(10)]
df['t-1'] = df['t'].shift(1)
print(df)

# Ignore first row due to NaN value
    # Row 1: t-1 as input (X), output is t (y)

   t  t-1
0  0  NaN
1  1  0.0
2  2  1.0
3  3  2.0
4  4  3.0
5  5  4.0
6  6  5.0
7  7  6.0
8  8  7.0
9  9  8.0


In [6]:
# Rename and reorder columns to reflect the above:
df.rename(columns={'t-1':'X','t':'y'}, inplace=True)
df = df[['X','y']]
print(df)

     X  y
0  NaN  0
1  0.0  1
2  1.0  2
3  2.0  3
4  3.0  4
5  4.0  5
6  5.0  6
7  6.0  7
8  7.0  8
9  8.0  9


In [7]:
# pd.shift() can accept negative int values
    # Inserts new rows at end (pulls observations up)
df_negative_shift = pd.DataFrame()
df_negative_shift['t'] = [x for x in range(10)]
df_negative_shift['t+1'] = df_negative_shift['t'].shift(-1)
print(df_negative_shift)

# input value (t) can be used to forecast output (t+1):

   t  t+1
0  0  1.0
1  1  2.0
2  2  3.0
3  3  4.0
4  4  5.0
5  5  6.0
6  6  7.0
7  7  8.0
8  8  9.0
9  9  NaN


In [None]:
# "This permits not only classical X -> y prediction, 
# but also X -> Y where both input and output can be sequences."
    # Shift() works on multivariate time series problems 
    # i.e. multiple observations for a time series