In [1]:
from pandas import DataFrame
df = DataFrame()
df['t'] = [x for x in range(10)]
print(df)

   t
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9


In [2]:
# Shifting observations by one time step

from pandas import DataFrame
df = DataFrame()
df['t'] = [x for x in range(10)]
df['t-1'] = df['t'].shift(1)
print(df)

   t  t-1
0  0  NaN
1  1  0.0
2  2  1.0
3  3  2.0
4  4  3.0
5  5  4.0
6  6  5.0
7  7  6.0
8  8  7.0
9  9  8.0


In [3]:
from pandas import DataFrame
df = DataFrame()
df['t'] = [x for x in range(10)]
df['t+1'] = df['t'].shift(-1)
print(df)

   t  t+1
0  0  1.0
1  1  2.0
2  2  3.0
3  3  4.0
4  4  5.0
5  5  6.0
6  6  7.0
7  7  8.0
8  8  9.0
9  9  NaN


## series_to_supervised()

In this section, we will define a new Python function named series_to_supervised() that takes a univariate or multivariate time series and frames it as a supervised learning dataset.

The function takes four arguments:

 - data: Sequence of observations as a list or 2D NumPy array. Required.
 - n_in: Number of lag observations as input (X). Values may be between [1..len(data)] Optional. Defaults to 1.
 - n_out: Number of observations as output (y). Values may be between [0..len(data)-1]. Optional. Defaults to 1.
 - dropnan: Boolean whether or not to drop rows with NaN values. Optional. Defaults to True.

The function returns a single value:

return: Pandas DataFrame of series framed for supervised learning.

In [6]:
from pandas import DataFrame
from pandas import concat

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.

    Arguments:
        data: Sequence of observations as a list or NumPy array
        n_in: Number of lag observations as input (X)
        n_out number of observations as output (y)
        dropnan: Boolean whether or not to drop rows with nan values
    Returns:
        Pandas DataFrame of series framed for supervised learning
        """

    n_vars = 1 if type(data) is list else data.shape[1]
    df = DataFrame(data)
    cols, names = list(), list()
    for i in range(n_in,0,-1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]

    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t+%d)' % (j+1,i)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]

    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names

    # drop rows with NaN
    if dropnan:
        agg.dropna(inplace=True)
    return agg


# One step Univariate Forecasting:

In [8]:
values = [x for x in range(10)]
data = series_to_supervised(values)
print(data)

   var1(t-1)  var1(t+0)
1        0.0          1
2        1.0          2
3        2.0          3
4        3.0          4
5        4.0          5
6        5.0          6
7        6.0          7
8        7.0          8
9        8.0          9


In [10]:
data = series_to_supervised(values,3)
data

Unnamed: 0,var1(t-3),var1(t-2),var1(t-1),var1(t+0)
3,0.0,1.0,2.0,3
4,1.0,2.0,3.0,4
5,2.0,3.0,4.0,5
6,3.0,4.0,5.0,6
7,4.0,5.0,6.0,7
8,5.0,6.0,7.0,8
9,6.0,7.0,8.0,9


In [12]:
# multi step or sequence forecasting
data = series_to_supervised(values, 2, 2)
print(data)

   var1(t-2)  var1(t-1)  var1(t+0)  var1(t+1)
2        0.0        1.0          2        3.0
3        1.0        2.0          3        4.0
4        2.0        3.0          4        5.0
5        3.0        4.0          5        6.0
6        4.0        5.0          6        7.0
7        5.0        6.0          7        8.0
8        6.0        7.0          8        9.0


In [13]:
# Multivariate Forecasting

In [15]:
raw = DataFrame()
raw['ob1'] = [x for x in range(10)]
raw['ob2'] = [x for x in range(50,60)]
values = raw.values
data = series_to_supervised(values)
print(data)

   var1(t-1)  var2(t-1)  var1(t+0)  var2(t+0)
1        0.0       50.0          1         51
2        1.0       51.0          2         52
3        2.0       52.0          3         53
4        3.0       53.0          4         54
5        4.0       54.0          5         55
6        5.0       55.0          6         56
7        6.0       56.0          7         57
8        7.0       57.0          8         58
9        8.0       58.0          9         59


In [16]:
raw = DataFrame()
raw['ob1'] = [x for x in range(10)]
raw['ob2'] = [x for x in range(50, 60)]
values = raw.values
data = series_to_supervised(values, 1, 2)
print(data)

   var1(t-1)  var2(t-1)  var1(t+0)  var2(t+0)  var1(t+1)  var2(t+1)
1        0.0       50.0          1         51        2.0       52.0
2        1.0       51.0          2         52        3.0       53.0
3        2.0       52.0          3         53        4.0       54.0
4        3.0       53.0          4         54        5.0       55.0
5        4.0       54.0          5         55        6.0       56.0
6        5.0       55.0          6         56        7.0       57.0
7        6.0       56.0          7         57        8.0       58.0
8        7.0       57.0          8         58        9.0       59.0
