# Resampling Datetime

<span>When plotting times series data it becomes advantageous to reframe and aggregate the data by a period of time. Data Scientists that deal with time series data prefer Pandas because resampling the data become really easy when your data is formatted in the correct manner. To resample a dataframe your datetime values should be in datetime data type and be set t the index of the dataframe.</span>
    
### Import Preliminaries

In [1]:
# Import modules
import pandas as pd
import numpy as np

# Create a dataframe
df =pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='D'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

# View the dataframe 
df.head()

Unnamed: 0,datetime,feature_1,feature_2
0,2018-12-25,-1.400024,0.396009
1,2018-12-26,2.150836,0.643915
2,2018-12-27,0.890622,0.523227
3,2018-12-28,-0.339172,0.651509
4,2018-12-29,-1.309172,0.417152


In [2]:
# Make the datetime feature the index of the dataframe
df.set_index('datetime', inplace=True)
df.head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25,-1.400024,0.396009
2018-12-26,2.150836,0.643915
2018-12-27,0.890622,0.523227
2018-12-28,-0.339172,0.651509
2018-12-29,-1.309172,0.417152


### Daily Resampling

In [3]:
# Resample the dataframe by day, and return the aggregated sum
df.resample('1D').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25,-1.400024,0.396009
2018-12-26,2.150836,0.643915
2018-12-27,0.890622,0.523227
2018-12-28,-0.339172,0.651509
2018-12-29,-1.309172,0.417152


### Weekly Resampling

In [7]:
# Resample the dataframe by week, and return the max value
df.resample('1W').max().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-30,2.150836,0.651509
2019-01-06,2.266355,0.880982
2019-01-13,1.458251,0.897223
2019-01-20,0.912754,0.737018
2019-01-27,1.246409,0.987816


### Monthly Resampling

In [11]:
# Resample the dataframe by month, and return the min value
df.resample('1M').min().head(5)

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-31,-1.400024,0.251083
2019-01-31,-1.696377,0.095065
2019-02-28,-1.554629,0.014236
2019-03-31,-1.882866,0.048322


### Yearly Resampling

In [12]:
# Resample the dataframe by year, and return the 
# unique values of each year
df.resample('1Y').nunique()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-31,7,7
2019-12-31,83,83


### Resampling by the Hour

In [13]:
# Create another dataframe that includes time as well
df = pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='H'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

# Set the indec to the datetime value
df.set_index('datetime', inplace=True)

# Resample the data by 2 hour intervals
df.resample('2H').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25 00:00:00,-1.147581,1.189723
2018-12-25 02:00:00,1.187094,1.60225
2018-12-25 04:00:00,1.040407,1.584479
2018-12-25 06:00:00,0.242816,0.888224
2018-12-25 08:00:00,-0.193563,0.790089


### Resampling by the Mintue

In [15]:
# Create another dataframe that includes time with minutes
df = pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='T'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

# Set the indec to the datetime value
df.set_index('datetime', inplace=True)

# Resample the data by 3 minute intervals
df.resample('3T').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25 00:00:00,-0.035448,1.556011
2018-12-25 00:03:00,2.939115,1.636258
2018-12-25 00:06:00,-2.695364,1.306642
2018-12-25 00:09:00,-2.656164,0.897577
2018-12-25 00:12:00,-0.13254,0.407403


### Resampling by the Second

In [16]:
# Create another dataframe that includes time with seconds
df = pd.DataFrame( data={'datetime': pd.date_range('12/25/2018', periods=90, freq='s'),
                      'feature_1': np.random.randn(90),
                      'feature_2': np.random.random(90)})

# Set the indec to the datetime value
df.set_index('datetime', inplace=True)

# Resample the data by 3 minute intervals
df.resample('30s').sum().head()

Unnamed: 0_level_0,feature_1,feature_2
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-25 00:00:00,2.084843,13.144497
2018-12-25 00:00:30,2.507993,14.292305
2018-12-25 00:01:00,-0.854539,16.713357


Author: Kavi Sekhon