In [1]:
import pandas as pd
import numpy as np
import datetime

In [41]:
# 构造 5*7 的时间序列数据
df = pd.DataFrame(np.arange(45).reshape(5, 9),
                  columns=['O', 'H', 'L', 'C', 'V', 'AP', 'AV', 'BP', 'BV'],
                  index=pd.date_range('20220101', periods=5))
df,

(             O   H   L   C   V  AP  AV  BP  BV
 2022-01-01   0   1   2   3   4   5   6   7   8
 2022-01-02   9  10  11  12  13  14  15  16  17
 2022-01-03  18  19  20  21  22  23  24  25  26
 2022-01-04  27  28  29  30  31  32  33  34  35
 2022-01-05  36  37  38  39  40  41  42  43  44,)

In [26]:
# shift 移动数据
# shift(periods, freq, axis, fill_value) -- 
#     periods 移动周期, 为正表示向下/右移动，为负表示向上/左移动
#     freq 移动周期的频率(字符串) - '1d', '2min'
#     axis = 0 表示行方向, axis = 1 表示列方向
#.    fill_value = -1

# 窗口向上移动2行(2天)（freq=2d, axis=0)
df_1 = df.shift(-2, '2d')
df_2 = df.shift(-1, '2d')
# 数据列向左移动1列（时间窗口不变）
df_10 = df.shift(-1, axis=1, fill_value=-100)
# 数据向上移动1行（往以前的方向）
df_11 = df.shift(-1, axis=0, fill_value=-100)

df_12 = df.shift(1, axis=1, fill_value=-100)
df_13 = df.shift(1, axis=0, fill_value=-100)


df_1, df_2, df_10, df_11, df_12, df_13

(             O   H   L   C   V  AP  AV  BP  BV
 2021-12-28   0   1   2   3   4   5   6   7   8
 2021-12-29   9  10  11  12  13  14  15  16  17
 2021-12-30  18  19  20  21  22  23  24  25  26
 2021-12-31  27  28  29  30  31  32  33  34  35
 2022-01-01  36  37  38  39  40  41  42  43  44,
              O   H   L   C   V  AP  AV  BP  BV
 2021-12-30   0   1   2   3   4   5   6   7   8
 2021-12-31   9  10  11  12  13  14  15  16  17
 2022-01-01  18  19  20  21  22  23  24  25  26
 2022-01-02  27  28  29  30  31  32  33  34  35
 2022-01-03  36  37  38  39  40  41  42  43  44,
              O   H   L   C   V  AP  AV  BP   BV
 2022-01-01   1   2   3   4   5   6   7   8 -100
 2022-01-02  10  11  12  13  14  15  16  17 -100
 2022-01-03  19  20  21  22  23  24  25  26 -100
 2022-01-04  28  29  30  31  32  33  34  35 -100
 2022-01-05  37  38  39  40  41  42  43  44 -100,
               O    H    L    C    V   AP   AV   BP   BV
 2022-01-01    9   10   11   12   13   14   15   16   17
 2022-01-02  

In [27]:
# 向下移动2个单位
df_2 = df.shift(2)
df_2

Unnamed: 0,O,H,L,C,V,AP,AV,BP,BV
2022-01-01,,,,,,,,,
2022-01-02,,,,,,,,,
2022-01-03,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0
2022-01-04,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0
2022-01-05,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0


In [28]:
# 移动目标:
# step 1: 将下一行的数据移动到上一行
df_1 = df.shift(-1)
df_1.columns = [c + '_1' for c in df.columns]
df_1

Unnamed: 0,O_1,H_1,L_1,C_1,V_1,AP_1,AV_1,BP_1,BV_1
2022-01-01,9.0,10.0,11.0,12.0,13.0,14.0,15.0,16.0,17.0
2022-01-02,18.0,19.0,20.0,21.0,22.0,23.0,24.0,25.0,26.0
2022-01-03,27.0,28.0,29.0,30.0,31.0,32.0,33.0,34.0,35.0
2022-01-04,36.0,37.0,38.0,39.0,40.0,41.0,42.0,43.0,44.0
2022-01-05,,,,,,,,,


In [35]:
# step 2: 新增列
df_2 = pd.concat([df_1[['O_1', 'C_1']], df], axis=1)
df_2

Unnamed: 0,O_1,C_1,O,H,L,C,V,AP,AV,BP,BV
2022-01-01,9.0,12.0,0,1,2,3,4,5,6,7,8
2022-01-02,18.0,21.0,9,10,11,12,13,14,15,16,17
2022-01-03,27.0,30.0,18,19,20,21,22,23,24,25,26
2022-01-04,36.0,39.0,27,28,29,30,31,32,33,34,35
2022-01-05,,,36,37,38,39,40,41,42,43,44


In [49]:
df

Unnamed: 0,O,H,L,C,V,AP,AV,BP,BV
2022-01-01,0,1,2,3,4,5,6,7,8
2022-01-02,9,10,11,12,13,14,15,16,17
2022-01-03,18,19,20,21,22,23,24,25,26
2022-01-04,27,28,29,30,31,32,33,34,35
2022-01-05,36,37,38,39,40,41,42,43,44


In [50]:
def concat_shift(df, periods, columns) -> pd.DataFrame:
    """
    将下一个时间periods的量合并到当前行中，返回DataFrame
    """
    # 列名后缀
    col_postfix = '_' + str(periods)
    
    # 计算和下一个时间periods的差异量生产df2
    df_ = df[columns].shift(periods)
    
    # 指定df2的列名
    df_.columns = [c + col_postfix for c in columns]
    # print(df_.columns)

    # 合并df和df2, 并返回
    return pd.concat([df_, df], axis=1)

In [51]:
df_3 = concat_shift(df, 1, ['O', 'H'])
df_3

Unnamed: 0,O_1,H_1,O,H,L,C,V,AP,AV,BP,BV
2022-01-01,,,0,1,2,3,4,5,6,7,8
2022-01-02,0.0,1.0,9,10,11,12,13,14,15,16,17
2022-01-03,9.0,10.0,18,19,20,21,22,23,24,25,26
2022-01-04,18.0,19.0,27,28,29,30,31,32,33,34,35
2022-01-05,27.0,28.0,36,37,38,39,40,41,42,43,44


In [53]:
df_4 = concat_shift(df_3, 2, ['O', 'H'])
df_4

Unnamed: 0,O_2,H_2,O_1,H_1,O,H,L,C,V,AP,AV,BP,BV
2022-01-01,,,,,0,1,2,3,4,5,6,7,8
2022-01-02,,,0.0,1.0,9,10,11,12,13,14,15,16,17
2022-01-03,0.0,1.0,9.0,10.0,18,19,20,21,22,23,24,25,26
2022-01-04,9.0,10.0,18.0,19.0,27,28,29,30,31,32,33,34,35
2022-01-05,18.0,19.0,27.0,28.0,36,37,38,39,40,41,42,43,44


In [54]:
df_5 = concat_shift(df_4, 3, ['O', 'H'])
df_5

Unnamed: 0,O_3,H_3,O_2,H_2,O_1,H_1,O,H,L,C,V,AP,AV,BP,BV
2022-01-01,,,,,,,0,1,2,3,4,5,6,7,8
2022-01-02,,,,,0.0,1.0,9,10,11,12,13,14,15,16,17
2022-01-03,,,0.0,1.0,9.0,10.0,18,19,20,21,22,23,24,25,26
2022-01-04,0.0,1.0,9.0,10.0,18.0,19.0,27,28,29,30,31,32,33,34,35
2022-01-05,9.0,10.0,18.0,19.0,27.0,28.0,36,37,38,39,40,41,42,43,44


In [55]:
df_ = df.copy()
for i in range(1, 4):
    df_ = concat_shift(df_, i, ['O', 'H'])
df_

Unnamed: 0,O_3,H_3,O_2,H_2,O_1,H_1,O,H,L,C,V,AP,AV,BP,BV
2022-01-01,,,,,,,0,1,2,3,4,5,6,7,8
2022-01-02,,,,,0.0,1.0,9,10,11,12,13,14,15,16,17
2022-01-03,,,0.0,1.0,9.0,10.0,18,19,20,21,22,23,24,25,26
2022-01-04,0.0,1.0,9.0,10.0,18.0,19.0,27,28,29,30,31,32,33,34,35
2022-01-05,9.0,10.0,18.0,19.0,27.0,28.0,36,37,38,39,40,41,42,43,44
