<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#向下采样" data-toc-modified-id="向下采样-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>向下采样</a></span></li><li><span><a href="#向上采样与插值" data-toc-modified-id="向上采样与插值-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>向上采样与插值</a></span></li><li><span><a href="#使用区间进行重新采样" data-toc-modified-id="使用区间进行重新采样-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>使用区间进行重新采样</a></span></li></ul></div>

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
rng = pd.date_range('2000-01-01', # 起始日期
                    periods=100, # 时间点的个数
                    freq='D') # 频率，每天

In [3]:
ts = Series(np.random.randn(len(rng)), 
            index=rng) # 索引

In [4]:
ts

2000-01-01    0.165914
2000-01-02    1.701454
2000-01-03   -0.230502
2000-01-04   -0.704661
2000-01-05    0.308662
2000-01-06    0.070235
2000-01-07   -1.114565
2000-01-08   -1.033986
2000-01-09    0.940699
2000-01-10   -0.796636
2000-01-11    0.005425
2000-01-12    0.452792
2000-01-13    0.530195
2000-01-14    0.310744
2000-01-15    0.456068
2000-01-16   -1.487182
2000-01-17    0.505272
2000-01-18   -0.261878
2000-01-19    1.464194
2000-01-20   -0.368198
2000-01-21   -0.006928
2000-01-22    0.735924
2000-01-23   -0.251471
2000-01-24   -0.942872
2000-01-25    0.562959
2000-01-26   -0.144462
2000-01-27    0.319910
2000-01-28   -0.030146
2000-01-29   -0.693563
2000-01-30    1.089756
                ...   
2000-03-11   -0.460000
2000-03-12   -0.421539
2000-03-13    0.332769
2000-03-14    0.271628
2000-03-15   -1.036599
2000-03-16    0.947106
2000-03-17   -1.407054
2000-03-18    0.654411
2000-03-19   -0.456031
2000-03-20   -0.261975
2000-03-21    0.702271
2000-03-22    3.323726
2000-03-23 

In [5]:
# 'M' 月底日期
ts.resample('M').mean() # 默认kind='timestamp'，对时间戳聚合

2000-01-31    0.019204
2000-02-29   -0.017022
2000-03-31   -0.004152
2000-04-30   -0.011003
Freq: M, dtype: float64

In [6]:
ts.resample('M', kind='timestamp').mean() # 对时间戳聚合

2000-01-31    0.019204
2000-02-29   -0.017022
2000-03-31   -0.004152
2000-04-30   -0.011003
Freq: M, dtype: float64

In [7]:
ts.resample('M', kind='period').mean() # 对区间聚合

2000-01    0.019204
2000-02   -0.017022
2000-03   -0.004152
2000-04   -0.011003
Freq: M, dtype: float64

# 向下采样

In [8]:
rng = pd.date_range('2000-01-01', # 起始时间 
                    periods=12, # 时间点的个数
                    freq='T') # 频率，每分钟

In [9]:
ts = Series(np.arange(len(rng)), 
            index=rng) # 索引

In [10]:
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [11]:
ts.resample('5min').sum() # 默认左闭右开，即[00:00, 00:05), [00:05, 00:10), [00:10, 00:15)

2000-01-01 00:00:00    10
2000-01-01 00:05:00    35
2000-01-01 00:10:00    21
Freq: 5T, dtype: int32

In [12]:
ts.resample('5min',closed='right').sum() # 左开又闭，即(55:00, 00:00], (00:00, 00:05], (00:05, 00:10], (00:10, 00:15]

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [13]:
# 传入label='right'可以使用右箱体边界标记时间序列
ts.resample('5min', closed='right', label='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32

In [14]:
ts.resample('5min', 
            closed='right', # 左开右闭
            label='right', # 使用右箱体边界标记时间序列
            loffset='-1s').sum() # 传入日期偏置

1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32

In [15]:
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


# 向上采样与插值

In [16]:
frame = DataFrame(np.random.randn(2, 4),
                  index=pd.date_range('1/1/2000', 
                                      periods=2, 
                                      freq='W-WED'),
                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])

In [17]:
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,1.193767,-0.493337,0.709608,-0.980933
2000-01-12,1.115139,-0.835155,0.870269,-0.069021


In [18]:
df_daily = frame.resample('D')

In [19]:
df_daily

DatetimeIndexResampler [freq=<Day>, axis=0, closed=left, label=left, convention=start, base=0]

In [20]:
df_daily.asfreq()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,1.193767,-0.493337,0.709608,-0.980933
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,1.115139,-0.835155,0.870269,-0.069021


In [21]:
frame.resample('D').ffill() # 前向插值，使用之前的值来填充后面的缺失值

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,1.193767,-0.493337,0.709608,-0.980933
2000-01-06,1.193767,-0.493337,0.709608,-0.980933
2000-01-07,1.193767,-0.493337,0.709608,-0.980933
2000-01-08,1.193767,-0.493337,0.709608,-0.980933
2000-01-09,1.193767,-0.493337,0.709608,-0.980933
2000-01-10,1.193767,-0.493337,0.709608,-0.980933
2000-01-11,1.193767,-0.493337,0.709608,-0.980933
2000-01-12,1.115139,-0.835155,0.870269,-0.069021


In [22]:
frame.resample('D').ffill(limit=2) # 只插入两行

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,1.193767,-0.493337,0.709608,-0.980933
2000-01-06,1.193767,-0.493337,0.709608,-0.980933
2000-01-07,1.193767,-0.493337,0.709608,-0.980933
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,1.115139,-0.835155,0.870269,-0.069021


In [23]:
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,1.193767,-0.493337,0.709608,-0.980933
2000-01-13,1.115139,-0.835155,0.870269,-0.069021


# 使用区间进行重新采样

In [24]:
frame = DataFrame(np.random.randn(24, 4),
                  index=pd.period_range('1-2000', # 起始月份
                                        '12-2001', # 终止月份
                                        freq='M'), # 每月
                  columns=['Colorado', 'Texas', 'New York', 'Ohio'])

In [25]:
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01,0.362781,0.200059,0.092981,-1.318042
2000-02,1.362751,1.058781,0.482718,-1.055999
2000-03,-0.862085,-0.315595,-0.63752,-0.321935
2000-04,-0.713523,-0.192767,0.682412,-1.767096
2000-05,0.747428,0.070078,0.600444,0.445344
2000-06,1.338336,0.711442,0.60571,0.081706
2000-07,-0.841926,-0.845048,0.423556,-0.504845
2000-08,-1.668009,-0.675016,-1.903676,-0.677681
2000-09,1.304135,-0.085456,0.354473,1.127472
2000-10,-1.340947,0.68185,1.048449,-0.268838


In [26]:
annual_frame = frame.resample('A-DEC').mean() # 将每月采样为每年，年末为12月份

In [27]:
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000,-0.036321,0.159291,0.355778,-0.329985
2001,-0.030628,-0.094573,0.693025,0.160532


In [28]:
annual_frame.resample('Q-DEC').ffill() # 将每月采样为每季度，季度末在12月份

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,-0.036321,0.159291,0.355778,-0.329985
2000Q2,-0.036321,0.159291,0.355778,-0.329985
2000Q3,-0.036321,0.159291,0.355778,-0.329985
2000Q4,-0.036321,0.159291,0.355778,-0.329985
2001Q1,-0.030628,-0.094573,0.693025,0.160532
2001Q2,-0.030628,-0.094573,0.693025,0.160532
2001Q3,-0.030628,-0.094573,0.693025,0.160532
2001Q4,-0.030628,-0.094573,0.693025,0.160532


In [29]:
annual_frame.resample('Q-DEC', convention='end').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q4,-0.036321,0.159291,0.355778,-0.329985
2001Q1,-0.036321,0.159291,0.355778,-0.329985
2001Q2,-0.036321,0.159291,0.355778,-0.329985
2001Q3,-0.036321,0.159291,0.355778,-0.329985
2001Q4,-0.030628,-0.094573,0.693025,0.160532


In [30]:
annual_frame.resample('Q-MAR').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q4,-0.036321,0.159291,0.355778,-0.329985
2001Q1,-0.036321,0.159291,0.355778,-0.329985
2001Q2,-0.036321,0.159291,0.355778,-0.329985
2001Q3,-0.036321,0.159291,0.355778,-0.329985
2001Q4,-0.030628,-0.094573,0.693025,0.160532
2002Q1,-0.030628,-0.094573,0.693025,0.160532
2002Q2,-0.030628,-0.094573,0.693025,0.160532
2002Q3,-0.030628,-0.094573,0.693025,0.160532
