# Resampling Time Series DataFrames

In [81]:
import pandas as pd
import numpy as np

df =pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='D'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

df.head()

Unnamed: 0,datetime,feature_1,feature_2
0,2018-12-25,-0.042305,0.802248
1,2018-12-26,-0.50332,0.048699
2,2018-12-27,-0.140212,0.14335
3,2018-12-28,0.933979,0.736463
4,2018-12-29,-0.431505,0.765205


In [82]:
df.set_index('datetime', inplace=True)
df.head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25,-0.042305,0.802248
2018-12-26,-0.50332,0.048699
2018-12-27,-0.140212,0.14335
2018-12-28,0.933979,0.736463
2018-12-29,-0.431505,0.765205


### Dailly Resampling

In [84]:
df.resample('1D').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25,-0.042305,0.802248
2018-12-26,-0.50332,0.048699
2018-12-27,-0.140212,0.14335
2018-12-28,0.933979,0.736463
2018-12-29,-0.431505,0.765205


## Weekly Resampling

In [85]:
df.resample('1W').max().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-30,0.933979,0.802248
2019-01-06,1.90184,0.908213
2019-01-13,1.302976,0.819024
2019-01-20,0.820549,0.95485
2019-01-27,1.362464,0.83966


### Monthly Resampling

In [34]:
df.resample('1M').min().head(5)

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-31,-1.481009,0.00643
2019-01-31,-2.043958,0.026396
2019-02-28,-1.80581,0.126203
2019-03-31,-1.834423,0.025337


### Yearly Resampling

In [59]:
df.resample('1Y').nunique()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-31,7,7
2019-12-31,83,83


### Resampling by the Hour

In [73]:
df = pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='H'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

df.set_index('datetime', inplace=True)
df.resample('2H').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25 00:00:00,0.039395,0.429378
2018-12-25 02:00:00,-0.304945,1.618242
2018-12-25 04:00:00,-0.999194,0.579101
2018-12-25 06:00:00,0.250126,0.659269
2018-12-25 08:00:00,-0.767738,0.534183


### Resampling by the Mintue

In [76]:
df = pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='T'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

df.set_index('datetime', inplace=True)
df.resample('3T').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25 00:00:00,1.771967,1.497531
2018-12-25 00:03:00,-1.505933,1.013679
2018-12-25 00:06:00,-1.036548,1.209083
2018-12-25 00:09:00,1.538142,1.335962
2018-12-25 00:12:00,2.369288,1.230875


### Resampling by the Second

In [79]:
df = pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='s'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

df.set_index('datetime', inplace=True)
df.resample('30s').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25 00:00:00,4.627835,13.92253
2018-12-25 00:00:30,7.560767,15.331102
2018-12-25 00:01:00,4.648913,15.442222
