In [1]:
import pandas as pd
import numpy as np

In [2]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [3]:
converted = ts.asfreq('45Min', method='pad')

In [4]:
ts.shape

(72,)

In [5]:
converted.shape

(95,)

In [6]:
# Does asfreq change the # of rows?
# Yes

In [7]:
# What do the different methods do?
# method : {‘backfill’, ‘bfill’, ‘pad’, ‘ffill’, None}
# Backfill == bfill, pad == ffill
# These method help interpolate data where missing period exists, either in a backfill or 
# forward fill method.

In [8]:
# Might any of these methods have pitfalls from a logical point of view?

In [9]:
# What's the difference between going to a higher frequency and a lower frequency?

In [10]:
converted = ts.asfreq('90Min', method = 'bfill')

In [11]:
ts.resample('2H').mean()[1:10]

2011-01-01 02:00:00    0.737972
2011-01-01 04:00:00    1.728496
2011-01-01 06:00:00   -0.124110
2011-01-01 08:00:00   -0.319404
2011-01-01 10:00:00    0.064784
2011-01-01 12:00:00   -0.742955
2011-01-01 14:00:00   -0.633735
2011-01-01 16:00:00   -0.500602
2011-01-01 18:00:00    0.383804
Freq: 2H, dtype: float64

In [12]:
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))), size = 10, replace = False))]

In [13]:
irreg_ts

2011-01-01 22:00:00   -0.386271
2011-01-01 18:00:00   -0.168538
2011-01-02 10:00:00   -0.354844
2011-01-01 09:00:00    0.851263
2011-01-02 00:00:00   -0.310230
2011-01-02 08:00:00   -0.105289
2011-01-03 14:00:00   -0.875544
2011-01-03 02:00:00    0.786812
2011-01-03 01:00:00    0.861976
2011-01-03 20:00:00   -0.528531
dtype: float64

### What's different logically about going to a higher frequency vs a lower frequency? 

You'll need to interpolate and fill in data that you may not have.

### What do you want to do when switching to a lower freqeuncy that is not logical when switching to a higher frequency?

Delete values.


In [14]:
ts.resample('D').sum()

2011-01-01    2.475354
2011-01-02   -9.106375
2011-01-03   -2.504108
Freq: D, dtype: float64

In [15]:
# What if you want to downsample and you don't want to ffill or bfill?
# Interpolate, or use None.

In [16]:
# What is the difference between .resample() and .asfreq()? What are some special things you can do with .resample() you can't do with .asfreq()?
# .asfreq is pretty limited, .resample() gives more options, either mean, variance, etc.

In [17]:
rng = pd.date_range('1/1/2011', periods=72, freq='H')
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [18]:
irreg_ts = ts[list(np.random.choice(a = list(range(len(ts))), size = 10, replace = False))]
irreg_ts = irreg_ts.sort_index()

In [19]:
irreg_ts.resample('H').fillna(method = 'ffill', limit = 5)

2011-01-01 02:00:00    0.112584
2011-01-01 03:00:00    0.112584
2011-01-01 04:00:00    0.112584
2011-01-01 05:00:00    0.112584
2011-01-01 06:00:00    0.112584
2011-01-01 07:00:00    0.112584
2011-01-01 08:00:00         NaN
2011-01-01 09:00:00         NaN
2011-01-01 10:00:00         NaN
2011-01-01 11:00:00         NaN
2011-01-01 12:00:00         NaN
2011-01-01 13:00:00         NaN
2011-01-01 14:00:00         NaN
2011-01-01 15:00:00         NaN
2011-01-01 16:00:00    1.312852
2011-01-01 17:00:00    1.312852
2011-01-01 18:00:00    1.312852
2011-01-01 19:00:00    0.651947
2011-01-01 20:00:00    0.651947
2011-01-01 21:00:00    0.651947
2011-01-01 22:00:00    0.651947
2011-01-01 23:00:00   -0.070507
2011-01-02 00:00:00   -0.070507
2011-01-02 01:00:00   -0.070507
2011-01-02 02:00:00   -0.070507
2011-01-02 03:00:00   -0.070507
2011-01-02 04:00:00   -0.070507
2011-01-02 05:00:00         NaN
2011-01-02 06:00:00         NaN
2011-01-02 07:00:00         NaN
                         ...   
2011-01-

In [20]:
irreg_ts.resample('H')

DatetimeIndexResampler [freq=<Hour>, axis=0, closed=left, label=left, convention=start, base=0]