In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [3]:
ts.head()

2011-01-01 00:00:00    0.366281
2011-01-01 01:00:00    1.140965
2011-01-01 02:00:00   -0.490499
2011-01-01 03:00:00   -1.394605
2011-01-01 04:00:00   -0.792934
Freq: H, dtype: float64

In [4]:
ts.shape

(72,)

In [5]:
converted = ts.asfreq('45Min', method='ffill')
converted.head()

2011-01-01 00:00:00    0.366281
2011-01-01 00:45:00    0.366281
2011-01-01 01:30:00    1.140965
2011-01-01 02:15:00   -0.490499
2011-01-01 03:00:00   -1.394605
Freq: 45T, dtype: float64

In [6]:
converted.shape

(95,)

In [7]:
# Does asfreq change the # of rows?

Yes

In [8]:
# What do the different methods do?
# method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}

- 'backfill' == 'bfill'
- 'pad' == 'ffill'

In [9]:
ts.asfreq('45Min', method=None)

2011-01-01 00:00:00    0.366281
2011-01-01 00:45:00         NaN
2011-01-01 01:30:00         NaN
2011-01-01 02:15:00         NaN
2011-01-01 03:00:00   -1.394605
2011-01-01 03:45:00         NaN
2011-01-01 04:30:00         NaN
2011-01-01 05:15:00         NaN
2011-01-01 06:00:00   -0.473285
2011-01-01 06:45:00         NaN
2011-01-01 07:30:00         NaN
2011-01-01 08:15:00         NaN
2011-01-01 09:00:00    0.054219
2011-01-01 09:45:00         NaN
2011-01-01 10:30:00         NaN
2011-01-01 11:15:00         NaN
2011-01-01 12:00:00   -1.330632
2011-01-01 12:45:00         NaN
2011-01-01 13:30:00         NaN
2011-01-01 14:15:00         NaN
2011-01-01 15:00:00   -1.051196
2011-01-01 15:45:00         NaN
2011-01-01 16:30:00         NaN
2011-01-01 17:15:00         NaN
2011-01-01 18:00:00   -1.123569
2011-01-01 18:45:00         NaN
2011-01-01 19:30:00         NaN
2011-01-01 20:15:00         NaN
2011-01-01 21:00:00    1.555141
2011-01-01 21:45:00         NaN
                         ...   
2011-01-

In [10]:
# Might any of these methods have pitfalls from a logical point of view?

Using 'bfill' is like 'looking into the future' in time series analysis

In [11]:
# What's the difference between going to a higher frequency and a lower frequency?

The frequencies would determine how far apart the timestamps are

In [12]:
converted = ts.asfreq('90Min', method = 'bfill')

In [13]:
# What's different logically about going to a higher frequency vs a lower frequency? 
# What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?

In [14]:
ts.resample('D').sum()

2011-01-01   -4.828111
2011-01-02   -0.431384
2011-01-03    1.887488
Freq: D, dtype: float64

In [15]:
# What if you want to downsample and you don't want to ffill or bfill?

Use 'None' instead of the two mentioned methods

In [16]:
# What is the difference between .resample() and .asfreq()?

- .resample() produces an object that we can perform functions on
- .asfreq() changes the frequency of the DatetiemIndex object that also provides padding functionality

In [17]:
# What are some special things you can do with .resample() you can't do with .asfreq()?

Some functions such as count, mean, var, etc.