### 时间序列转换成适用于监督学习的数据

> 参考链接：https://machinelearningmastery.com/convert-time-series-supervised-learning-problem-python/

In [1]:
import pandas as pd

In [8]:
df = pd.DataFrame()
df['t'] = [x for x in range(10)]
print(df)

   t
0  0
1  1
2  2
3  3
4  4
5  5
6  6
7  7
8  8
9  9


In [3]:
df['t-1'] = df['t'].shift(1)
print(df)
# 第一行有NaN，所以不要第一列；第二行的第二列的值0是输入或者说x，第二行的第一列的值1是输出或者说y

   t  t-1
0  0  NaN
1  1  0.0
2  2  1.0
3  3  2.0
4  4  3.0
5  5  4.0
6  6  5.0
7  7  6.0
8  8  7.0
9  9  8.0


In [4]:
df['t-2'] = df['t'].shift(2)
print(df)

   t  t-1  t-2
0  0  NaN  NaN
1  1  0.0  NaN
2  2  1.0  0.0
3  3  2.0  1.0
4  4  3.0  2.0
5  5  4.0  3.0
6  6  5.0  4.0
7  7  6.0  5.0
8  8  7.0  6.0
9  9  8.0  7.0


In [6]:
df['t+1'] = df['t'].shift(-1)
df

Unnamed: 0,t,t-1,t-2,t+1
0,0,,,1.0
1,1,0.0,,2.0
2,2,1.0,0.0,3.0
3,3,2.0,1.0,4.0
4,4,3.0,2.0,5.0
5,5,4.0,3.0,6.0
6,6,5.0,4.0,7.0
7,7,6.0,5.0,8.0
8,8,7.0,6.0,9.0
9,9,8.0,7.0,


In [17]:
def series_to_supervised(data,n_in = 1,n_out = 1,dropnan = True):
    # 默认参数只根据t-1预测t
    # 判断是单变量还是多元时间序列
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols,names = list(),list()
    # input sequence
    for i in range(n_in,0,-1):
        cols.append(df.shift(i)) # 整体/所有变量平移
        names += [('var%d(t-%d)' % (j+1,i)) for j in range(n_vars)]
    # output sequence
    for i in range(0,n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    agg = pd.concat(cols,axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [13]:
series_to_supervised(df)

['var1(t-1)']
['var1(t-1)', 'var1(t)']


Unnamed: 0,var1(t-1),var1(t)
1,0.0,1
2,1.0,2
3,2.0,3
4,3.0,4
5,4.0,5
6,5.0,6
7,6.0,7
8,7.0,8
9,8.0,9


### 多元时间序列

In [18]:
raw = pd.DataFrame()
raw['ob1'] = [x for x in range(10)]
raw['ob2'] = [x for x in range(50, 60)]
values = raw.values
data = series_to_supervised(values)
print(data)

   var1(t-1)  var2(t-1)  var1(t)  var2(t)
1        0.0       50.0        1       51
2        1.0       51.0        2       52
3        2.0       52.0        3       53
4        3.0       53.0        4       54
5        4.0       54.0        5       55
6        5.0       55.0        6       56
7        6.0       56.0        7       57
8        7.0       57.0        8       58
9        8.0       58.0        9       59
