In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = pd.date_range('2000-01-01', periods=100, freq='D')
ts = pd.Series(np.random.randn(len(rng)), index = rng)
ts

2000-01-01   -1.561772
2000-01-02    0.478524
2000-01-03    1.963176
2000-01-04   -0.743876
2000-01-05    0.382321
2000-01-06    1.715468
2000-01-07   -0.356577
2000-01-08   -0.734760
2000-01-09   -0.304790
2000-01-10    0.019643
2000-01-11   -0.495947
2000-01-12   -0.078512
2000-01-13   -0.189683
2000-01-14   -0.605152
2000-01-15    0.102942
2000-01-16    0.934787
2000-01-17    1.010423
2000-01-18    0.519345
2000-01-19    0.535831
2000-01-20    1.296309
2000-01-21    0.814563
2000-01-22    0.116430
2000-01-23   -0.376174
2000-01-24   -0.819260
2000-01-25   -0.889245
2000-01-26   -0.403437
2000-01-27   -1.258456
2000-01-28    0.552229
2000-01-29   -0.786767
2000-01-30    0.430016
                ...   
2000-03-11    0.598910
2000-03-12   -1.542863
2000-03-13    0.558032
2000-03-14    1.177056
2000-03-15   -0.199608
2000-03-16    0.726741
2000-03-17   -0.755342
2000-03-18   -1.405652
2000-03-19   -1.348662
2000-03-20   -0.475254
2000-03-21    0.357171
2000-03-22   -1.922042
2000-03-23 

In [3]:
ts.resample('M').mean()

2000-01-31    0.069119
2000-02-29   -0.154838
2000-03-31    0.107246
2000-04-30   -0.192166
Freq: M, dtype: float64

In [4]:
ts.resample('M', kind='period').mean()

2000-01    0.069119
2000-02   -0.154838
2000-03    0.107246
2000-04   -0.192166
Freq: M, dtype: float64

# Resample method arguments
![Resample method arguments](./figs/11-5.jpg)

### Downsampling

In [5]:
rng = pd.date_range('2000-01-01', periods=12, freq='T')
ts = pd.Series(np.arange(12), index=rng)
ts

2000-01-01 00:00:00     0
2000-01-01 00:01:00     1
2000-01-01 00:02:00     2
2000-01-01 00:03:00     3
2000-01-01 00:04:00     4
2000-01-01 00:05:00     5
2000-01-01 00:06:00     6
2000-01-01 00:07:00     7
2000-01-01 00:08:00     8
2000-01-01 00:09:00     9
2000-01-01 00:10:00    10
2000-01-01 00:11:00    11
Freq: T, dtype: int32

In [6]:
# suppose you want to aggregate this data into five-minute chunks or bars by taking the sum of each group
ts.resample('5min', closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [7]:
# change the interval to be colosed on the right
ts.resample('5min', closed='right').sum()

1999-12-31 23:55:00     0
2000-01-01 00:00:00    15
2000-01-01 00:05:00    40
2000-01-01 00:10:00    11
Freq: 5T, dtype: int32

In [8]:
ts.resample('5min', closed='right', label='right').sum()

2000-01-01 00:00:00     0
2000-01-01 00:05:00    15
2000-01-01 00:10:00    40
2000-01-01 00:15:00    11
Freq: 5T, dtype: int32

In [9]:
ts.resample('5min', closed='right',
             label='right', loffset='-1s').sum()

1999-12-31 23:59:59     0
2000-01-01 00:04:59    15
2000-01-01 00:09:59    40
2000-01-01 00:14:59    11
Freq: 5T, dtype: int32

#### Open-High-Low-Close (OHLC) resampling

In [10]:
ts.resample('5min').ohlc()

Unnamed: 0,open,high,low,close
2000-01-01 00:00:00,0,4,0,4
2000-01-01 00:05:00,5,9,5,9
2000-01-01 00:10:00,10,11,10,11


## Upsampling and Interpolation

In [11]:
frame = pd.DataFrame(np.random.randn(2, 4),
                      index=pd.date_range('1/1/2000', periods=2,
                                          freq='W-WED'),
                      columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.438614,2.355068,-1.138644,-1.676226
2000-01-12,0.357043,-1.124317,0.532454,-1.258297


In [12]:
# we use the asfreq method to convert to the higher frequency without any aggregation
df_daily = frame.resample('D').asfreq()
df_daily

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.438614,2.355068,-1.138644,-1.676226
2000-01-06,,,,
2000-01-07,,,,
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,0.357043,-1.124317,0.532454,-1.258297


In [13]:
frame.resample('D').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.438614,2.355068,-1.138644,-1.676226
2000-01-06,0.438614,2.355068,-1.138644,-1.676226
2000-01-07,0.438614,2.355068,-1.138644,-1.676226
2000-01-08,0.438614,2.355068,-1.138644,-1.676226
2000-01-09,0.438614,2.355068,-1.138644,-1.676226
2000-01-10,0.438614,2.355068,-1.138644,-1.676226
2000-01-11,0.438614,2.355068,-1.138644,-1.676226
2000-01-12,0.357043,-1.124317,0.532454,-1.258297


In [14]:
frame.resample('D').ffill(limit=2)

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-05,0.438614,2.355068,-1.138644,-1.676226
2000-01-06,0.438614,2.355068,-1.138644,-1.676226
2000-01-07,0.438614,2.355068,-1.138644,-1.676226
2000-01-08,,,,
2000-01-09,,,,
2000-01-10,,,,
2000-01-11,,,,
2000-01-12,0.357043,-1.124317,0.532454,-1.258297


In [15]:
frame.resample('W-THU').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01-06,0.438614,2.355068,-1.138644,-1.676226
2000-01-13,0.357043,-1.124317,0.532454,-1.258297


### Resampling with Periods

In [16]:
frame = pd.DataFrame(np.random.randn(24, 4),
                      index=pd.period_range('1-2000', '12-2001', freq='M'),
                      columns=['Colorado', 'Texas', 'New York', 'Ohio'])
frame[:5]

Unnamed: 0,Colorado,Texas,New York,Ohio
2000-01,-0.545682,-0.453409,-1.912279,0.170939
2000-02,-0.737916,0.263383,0.750169,0.947824
2000-03,-1.249072,-2.364334,-0.714026,-0.634967
2000-04,-0.281855,2.160967,0.155179,0.140633
2000-05,-0.730502,0.691775,-0.840013,0.277469


In [17]:
annual_frame = frame.resample('A-DEC').mean()
annual_frame

Unnamed: 0,Colorado,Texas,New York,Ohio
2000,-0.202347,0.284667,-0.09806,-0.253191
2001,0.037116,0.219956,-0.110031,-0.089001


In [18]:
annual_frame.resample('Q-DEC').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q1,-0.202347,0.284667,-0.09806,-0.253191
2000Q2,-0.202347,0.284667,-0.09806,-0.253191
2000Q3,-0.202347,0.284667,-0.09806,-0.253191
2000Q4,-0.202347,0.284667,-0.09806,-0.253191
2001Q1,0.037116,0.219956,-0.110031,-0.089001
2001Q2,0.037116,0.219956,-0.110031,-0.089001
2001Q3,0.037116,0.219956,-0.110031,-0.089001
2001Q4,0.037116,0.219956,-0.110031,-0.089001


In [19]:
annual_frame.resample('Q-DEC', convention='end').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q4,-0.202347,0.284667,-0.09806,-0.253191
2001Q1,-0.202347,0.284667,-0.09806,-0.253191
2001Q2,-0.202347,0.284667,-0.09806,-0.253191
2001Q3,-0.202347,0.284667,-0.09806,-0.253191
2001Q4,0.037116,0.219956,-0.110031,-0.089001


In [20]:
annual_frame.resample('Q-MAR').ffill()

Unnamed: 0,Colorado,Texas,New York,Ohio
2000Q4,-0.202347,0.284667,-0.09806,-0.253191
2001Q1,-0.202347,0.284667,-0.09806,-0.253191
2001Q2,-0.202347,0.284667,-0.09806,-0.253191
2001Q3,-0.202347,0.284667,-0.09806,-0.253191
2001Q4,0.037116,0.219956,-0.110031,-0.089001
2002Q1,0.037116,0.219956,-0.110031,-0.089001
2002Q2,0.037116,0.219956,-0.110031,-0.089001
2002Q3,0.037116,0.219956,-0.110031,-0.089001
