In [1]:

import pandas as pd

In [2]:

# Define start and end period
start = '2022-1-1'
end = pd.to_datetime('today')

In [3]:

# Create dates from start to end of period
monthly_dates = pd.date_range(start, end, freq='M')
print(monthly_dates)

DatetimeIndex(['2022-01-31', '2022-02-28'], dtype='datetime64[ns]', freq='M')


In [4]:

# Create monthly series from dates
data = range(1, len(monthly_dates)+1)
monthly = pd.Series(data=data, index=monthly_dates)
print(monthly)

2022-01-31    1
2022-02-28    2
Freq: M, dtype: int64


## Upsampling with freq()

In [5]:

# Upsample (create more data points) to weekly frequency -> creates missing values
weekly = monthly.asfreq('W') # defaults to week-end frequency for period captured by monthly
print(weekly)

2022-02-06   NaN
2022-02-13   NaN
2022-02-20   NaN
2022-02-27   NaN
Freq: W-SUN, dtype: float64


In [6]:

# Convert weekly to dataframe
#  We'll use the dataframe to compare fill methods for missing data
weekly = weekly.to_frame('original')
weekly.head()

Unnamed: 0,original
2022-02-06,
2022-02-13,
2022-02-20,
2022-02-27,


In [7]:
weekly['ffill'] = monthly.asfreq('W', method='ffill') # Forward fill
weekly['bfill'] = monthly.asfreq('W', method='bfill') # Backward fill
weekly['fixed'] = monthly.asfreq('W', fill_value=0) 
weekly.head()

Unnamed: 0,original,ffill,bfill,fixed
2022-02-06,,1,2,0
2022-02-13,,1,2,0
2022-02-20,,1,2,0
2022-02-27,,1,2,0


## Upsampling through reindexing

In [8]:

# Create weekly series from dates
weekly_dates = pd.date_range(start, end, freq='W')
data = range(1, len(weekly_dates)+1)
weekly = pd.Series(data=data, index=weekly_dates)
print(weekly)

2022-01-02     1
2022-01-09     2
2022-01-16     3
2022-01-23     4
2022-01-30     5
2022-02-06     6
2022-02-13     7
2022-02-20     8
2022-02-27     9
2022-03-06    10
2022-03-13    11
2022-03-20    12
Freq: W-SUN, dtype: int64


In [9]:

# Reindex monthly dates by weekly dates 
# Creates a series of upsampled monthly dates with missing values
# Note that all dates from 1 Jan are captured in weekly_dates, so potentially less prone to error
monthly_upsampled = monthly.reindex(weekly_dates)
monthly_upsampled

2022-01-02   NaN
2022-01-09   NaN
2022-01-16   NaN
2022-01-23   NaN
2022-01-30   NaN
2022-02-06   NaN
2022-02-13   NaN
2022-02-20   NaN
2022-02-27   NaN
2022-03-06   NaN
2022-03-13   NaN
2022-03-20   NaN
Freq: W-SUN, dtype: float64

In [10]:

# Use forward filling to fill missing values
monthly_upsampled = monthly.reindex(weekly_dates, method='ffill')
monthly_upsampled

2022-01-02    NaN
2022-01-09    NaN
2022-01-16    NaN
2022-01-23    NaN
2022-01-30    NaN
2022-02-06    1.0
2022-02-13    1.0
2022-02-20    1.0
2022-02-27    1.0
2022-03-06    2.0
2022-03-13    2.0
2022-03-20    2.0
Freq: W-SUN, dtype: float64

In [11]:

# Use backward filling to fill missing values
monthly_upsampled = monthly.reindex(weekly_dates, method='bfill')
monthly_upsampled

2022-01-02    1.0
2022-01-09    1.0
2022-01-16    1.0
2022-01-23    1.0
2022-01-30    1.0
2022-02-06    2.0
2022-02-13    2.0
2022-02-20    2.0
2022-02-27    2.0
2022-03-06    NaN
2022-03-13    NaN
2022-03-20    NaN
Freq: W-SUN, dtype: float64

In [12]:

# Use fixed value to fill missing values
monthly_upsampled = monthly.reindex(weekly_dates, fill_value=1)
monthly_upsampled

2022-01-02    1
2022-01-09    1
2022-01-16    1
2022-01-23    1
2022-01-30    1
2022-02-06    1
2022-02-13    1
2022-02-20    1
2022-02-27    1
2022-03-06    1
2022-03-13    1
2022-03-20    1
Freq: W-SUN, dtype: int64