In [1]:
import pandas as pd
import numpy as np

In [9]:
rng = pd.date_range('1/1/2011', periods=24, freq='H')
ts = pd.Series(range(24), index=rng)
ts.head()

2011-01-01 00:00:00    0
2011-01-01 01:00:00    1
2011-01-01 02:00:00    2
2011-01-01 03:00:00    3
2011-01-01 04:00:00    4
Freq: H, dtype: int64

In [11]:
converted = ts.asfreq('45Min', method='ffill')
converted.head(6)

2011-01-01 00:00:00    0
2011-01-01 00:45:00    0
2011-01-01 01:30:00    1
2011-01-01 02:15:00    2
2011-01-01 03:00:00    3
2011-01-01 03:45:00    3
Freq: 45T, dtype: int64

In [12]:
# Does asfreq change the # of rows?
print(ts.shape)
print(converted.shape)

(24,)
(31,)


In [13]:
# What do the different methods do?
# method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}

# 'backfill/bfill': filling with future valid data points
# 'pad/ffill': filling with past valid data points. This is more frequement used way

In [17]:
# What's the difference between going to a higher frequency and a lower frequency?

In [20]:
converted = ts.asfreq('90Min', method = None)
converted

2011-01-01 00:00:00     0.0
2011-01-01 01:30:00     NaN
2011-01-01 03:00:00     3.0
2011-01-01 04:30:00     NaN
2011-01-01 06:00:00     6.0
2011-01-01 07:30:00     NaN
2011-01-01 09:00:00     9.0
2011-01-01 10:30:00     NaN
2011-01-01 12:00:00    12.0
2011-01-01 13:30:00     NaN
2011-01-01 15:00:00    15.0
2011-01-01 16:30:00     NaN
2011-01-01 18:00:00    18.0
2011-01-01 19:30:00     NaN
2011-01-01 21:00:00    21.0
2011-01-01 22:30:00     NaN
Freq: 90T, dtype: float64

In [None]:
# What's different logically about going to a higher frequency vs a lower frequency? 
# What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?

In [22]:
ts.resample('90Min').mean()

2011-01-01 00:00:00     0.5
2011-01-01 01:30:00     2.0
2011-01-01 03:00:00     3.5
2011-01-01 04:30:00     5.0
2011-01-01 06:00:00     6.5
2011-01-01 07:30:00     8.0
2011-01-01 09:00:00     9.5
2011-01-01 10:30:00    11.0
2011-01-01 12:00:00    12.5
2011-01-01 13:30:00    14.0
2011-01-01 15:00:00    15.5
2011-01-01 16:30:00    17.0
2011-01-01 18:00:00    18.5
2011-01-01 19:30:00    20.0
2011-01-01 21:00:00    21.5
2011-01-01 22:30:00    23.0
Freq: 90T, dtype: float64

In [41]:
# What if you want to downsample and you don't want to ffill or bfill?

# Default is None

In [35]:
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))), size = 10, replace=False))]
irreg_ts.sort_index()

2011-01-01 02:00:00     2
2011-01-01 06:00:00     6
2011-01-01 11:00:00    11
2011-01-01 14:00:00    14
2011-01-01 17:00:00    17
2011-01-01 18:00:00    18
2011-01-01 19:00:00    19
2011-01-01 20:00:00    20
2011-01-01 21:00:00    21
2011-01-01 23:00:00    23
dtype: int64

In [36]:
# `asfreq` and `resample` cannot work on unsorted timestamps
irreg_ts.sort_index().asfreq('90Min')

2011-01-01 02:00:00     2.0
2011-01-01 03:30:00     NaN
2011-01-01 05:00:00     NaN
2011-01-01 06:30:00     NaN
2011-01-01 08:00:00     NaN
2011-01-01 09:30:00     NaN
2011-01-01 11:00:00    11.0
2011-01-01 12:30:00     NaN
2011-01-01 14:00:00    14.0
2011-01-01 15:30:00     NaN
2011-01-01 17:00:00    17.0
2011-01-01 18:30:00     NaN
2011-01-01 20:00:00    20.0
2011-01-01 21:30:00     NaN
2011-01-01 23:00:00    23.0
Freq: 90T, dtype: float64

In [54]:
irreg_ts.sort_index().resample('90Min').ffill()

2011-01-01 01:30:00     NaN
2011-01-01 03:00:00     2.0
2011-01-01 04:30:00     2.0
2011-01-01 06:00:00     6.0
2011-01-01 07:30:00     6.0
2011-01-01 09:00:00     6.0
2011-01-01 10:30:00     6.0
2011-01-01 12:00:00    11.0
2011-01-01 13:30:00    11.0
2011-01-01 15:00:00    14.0
2011-01-01 16:30:00    14.0
2011-01-01 18:00:00    18.0
2011-01-01 19:30:00    19.0
2011-01-01 21:00:00    21.0
2011-01-01 22:30:00    21.0
Freq: 90T, dtype: float64

In [42]:
# What is the difference between .resample() and .asfreq()?
# `asfreq()` without filling method will put NaN in case there is no data landed on the same timestamp
# `asfreq()` is more limited while `resample()` is more powerful that you can apply different aggregation method on top

In [None]:
# What are some special things you can do with .resample() you can't do with .asfreq()?
# `resample()` is more powerful and can apply window functions

In [51]:
# How can I forward fill with a few data points (hint: fillna)
irreg_ts.sort_index().asfreq('90Min').fillna(limit=5, method='ffill')

2011-01-01 02:00:00     2.0
2011-01-01 03:30:00     2.0
2011-01-01 05:00:00     2.0
2011-01-01 06:30:00     2.0
2011-01-01 08:00:00     2.0
2011-01-01 09:30:00     2.0
2011-01-01 11:00:00    11.0
2011-01-01 12:30:00    11.0
2011-01-01 14:00:00    14.0
2011-01-01 15:30:00    14.0
2011-01-01 17:00:00    17.0
2011-01-01 18:30:00    17.0
2011-01-01 20:00:00    20.0
2011-01-01 21:30:00    20.0
2011-01-01 23:00:00    23.0
Freq: 90T, dtype: float64