# Resampling Time Series Data

In [63]:
import pandas as pd

df = pd.read_csv('../data/demand_index.csv', index_col=['Week'], date_format='%m/%d/%y')
df.head(10)

Unnamed: 0_level_0,demand_index
Week,Unnamed: 1_level_1
2018-12-30,125
2019-01-06,108
2019-01-13,153
2019-01-20,153
2019-01-27,173
2019-02-03,195
2019-02-10,170
2019-02-17,213
2019-02-24,216
2019-03-03,181


In [87]:
#Resampling
#D:Day, W:Week, M:Month, Q:Quarter, A:Year
#mean(), sum(), min(), max()

monthly_average = df.resample('M').mean()
print(monthly_average.head())
annual_total = df.resample('A').sum()
print(annual_total.head())

            demand_index
Week                    
2018-12-31        125.00
2019-01-31        146.75
2019-02-28        198.50
2019-03-31        175.00
2019-04-30        168.00
            demand_index
Week                    
2018-12-31           125
2019-12-31          9121
2020-12-31          8790
2021-12-31          8873
2022-12-31          7660


## Date/Time Formatting Guide
Year:

%Y: Full year (2024)
%y: Two digit year (24)

Month:
%m: Month as zero-padded decimal number (01-12)
%b: Abbreviated month name (Jan-Dec)
%B: Full month name (January-December)

Day:
%d: Day of the month as zero-padded decimal number (01-31)
%A: Full weekday name (Monday-Sunday)
%a: Abbreviated weekday name (Mon-Sun)

Time:
%H: Hour in 24-hour format (00-23)
%I: Hour in 12-hour format (01-12)
%M: Minute as zero-padded decimal number (00-59)
%S: Second as zero-padded decimal number (00-59)
%p: Meridian indicator (AM or PM)

Examples:
"%Y-%m-%d": Year-month-day format (2024-06-10)
"%d/%m/%Y": Day-month-year format (10/06/2024)
"%A, %B %d, %Y": Full weekday, full month name, day, year (Monday, June 10, 2024)
"%H:%M:%S": Hour in 24-hour format, minute, second (11:16:23)

Python strftime function: https://docs.python.org/3/library/datetime.html

pandas to_datetime function with format strings: https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html

In [69]:
#Selecting / Slicing by Date
data_on_date = df.loc['2019-01-06']
data_in_range = df.loc['2019-01-06':'2019-01-20']

In [83]:
#Resampling
#D:Day, W:Week, M:Month, Q:Quarter, A:Year
#mean(), sum(), min(), max()


monthly_average = df.resample('M').mean()
print(monthly_average.head(5))
annual_total = df.resample('A').sum()
print(annual_total)

            demand_index
Week                    
2018-12-31        125.00
2019-01-31        146.75
2019-02-28        198.50
2019-03-31        175.00
2019-04-30        168.00
            demand_index
Week                    
2018-12-31           125
2019-12-31          9121
2020-12-31          8790
2021-12-31          8873
2022-12-31          7660
2023-12-31          6156


In [69]:
#Selecting / Slicing by Date
data_on_date = df.loc['2019-01-06']
data_in_range = df.loc['2019-01-06':'2019-01-20']