In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import datetime

In [2]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index=rng)
display(ts)
display(ts.resample('M').mean())
display(ts.resample('M', kind='period').mean())

2000-01-01    0.268939
2000-01-02    1.509141
2000-01-03    0.328840
2000-01-04    1.084822
2000-01-05   -0.414199
                ...   
2000-04-05    1.130602
2000-04-06   -0.159774
2000-04-07    1.431308
2000-04-08    1.588526
2000-04-09   -1.319086
Freq: D, Length: 100, dtype: float64

2000-01-31    0.216489
2000-02-29   -0.200507
2000-03-31   -0.097416
2000-04-30    0.158622
Freq: M, dtype: float64

2000-01    0.216489
2000-02   -0.200507
2000-03   -0.097416
2000-04    0.158622
Freq: M, dtype: float64

### 向下采样

In [3]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
display(ts)

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int64

In [4]:
display(ts.resample('5min').sum())                  # 默认采样区间是左闭右开的
display(ts.resample('5min', closed='right').sum())  # 更改采样区间为右侧闭合
display(ts.resample('5min', closed='right', label='right').sum())  # 更改聚合项使用采样区间右侧边界作为label
display(ts.resample('5min', closed='right', label='right', loffset='-1s').sum())

2000-01-01 00:00:00    10
2000-01-01 00:05:00    35
2000-01-01 00:10:00    21
Freq: 5T, dtype: int64

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int64

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int64


>>> df.resample(freq="3s", loffset="8H")

becomes:

>>> from pandas.tseries.frequencies import to_offset
>>> df = df.resample(freq="3s").mean()
>>> df.index = df.index.to_timestamp() + to_offset("8H")

  after removing the cwd from sys.path.


1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int64

#### OHLC重采样

金融领域中有一种无所不在的时间序列聚合方式，即计算各面元的四个值：第一个值（open，开盘）、最后一个值（close，收盘）、最大值（high，最高）以及最小值（low，最低）。传入how='ohlc'即可得到一个含有这四种聚合值的DataFrame。

In [5]:
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


### 向上采样与插值

In [6]:
frame = pd.DataFrame(np.random.randn(2, 4),
                     index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-12,0.868788,-0.735632,0.523501,-0.495852


In [7]:
frame.resample('D').asfreq()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,0.868788,-0.735632,0.523501,-0.495852


In [8]:
display(frame.resample('D').ffill())
display(frame.resample('D').ffill(limit=2))
display(frame.resample('W-THU').ffill())

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-06,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-07,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-08,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-09,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-10,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-11,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-12,0.868788,-0.735632,0.523501,-0.495852


Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-06,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-07,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,0.868788,-0.735632,0.523501,-0.495852


Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,-0.200305,-0.227567,-0.042122,-0.570252
2000-01-13,0.868788,-0.735632,0.523501,-0.495852


### 使用区间进行重新采样

In [9]:
frame = pd.DataFrame(np.random.randn(24, 4),
                     index=pd.period_range('1-2000', '12-2001', freq='M'),
                     columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01,0.153053,0.672842,0.609391,-0.164491
2000-02,-0.856949,0.884225,-0.31307,0.797493
2000-03,0.613424,1.416959,0.935741,-1.078349
2000-04,-1.153335,-0.11927,2.645766,-0.649923
2000-05,1.182496,1.015557,0.844596,-0.325186


In [10]:
annual_frame = frame.resample('A-DEC').mean()  # 每年度，年末在12月
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000,-0.519514,0.31287,0.644986,-0.090517
2001,0.405931,-0.098521,0.187099,0.043117


In [11]:
display(annual_frame.resample('Q-DEC').ffill())  # 每季度，年末在12月
display(annual_frame.resample('Q-DEC', convention='end').ffill())

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,-0.519514,0.31287,0.644986,-0.090517
2000Q2,-0.519514,0.31287,0.644986,-0.090517
2000Q3,-0.519514,0.31287,0.644986,-0.090517
2000Q4,-0.519514,0.31287,0.644986,-0.090517
2001Q1,0.405931,-0.098521,0.187099,0.043117
2001Q2,0.405931,-0.098521,0.187099,0.043117
2001Q3,0.405931,-0.098521,0.187099,0.043117
2001Q4,0.405931,-0.098521,0.187099,0.043117


Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q4,-0.519514,0.31287,0.644986,-0.090517
2001Q1,-0.519514,0.31287,0.644986,-0.090517
2001Q2,-0.519514,0.31287,0.644986,-0.090517
2001Q3,-0.519514,0.31287,0.644986,-0.090517
2001Q4,0.405931,-0.098521,0.187099,0.043117


In [12]:
annual_frame.resample('Q-MAR').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q4,-0.519514,0.31287,0.644986,-0.090517
2001Q1,-0.519514,0.31287,0.644986,-0.090517
2001Q2,-0.519514,0.31287,0.644986,-0.090517
2001Q3,-0.519514,0.31287,0.644986,-0.090517
2001Q4,0.405931,-0.098521,0.187099,0.043117
2002Q1,0.405931,-0.098521,0.187099,0.043117
2002Q2,0.405931,-0.098521,0.187099,0.043117
2002Q3,0.405931,-0.098521,0.187099,0.043117
