In [1]:
!pip install holidays
!pip install statsmodels

import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import holidays
from statsmodels.tsa.seasonal import seasonal_decompose




Collecting holidays
  Downloading holidays-0.77-py3-none-any.whl.metadata (46 kB)
Downloading holidays-0.77-py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: holidays
Successfully installed holidays-0.77
Collecting statsmodels
  Downloading statsmodels-0.14.5-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.5 kB)
Collecting patsy>=0.5.6 (from statsmodels)
  Downloading patsy-1.0.1-py2.py3-none-any.whl.metadata (3.3 kB)
Downloading statsmodels-0.14.5-cp312-cp312-macosx_11_0_arm64.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading patsy-1.0.1-py2.py3-none-any.whl (232 kB)
Installing collected packages: patsy, statsmodels
Successfully installed patsy-1.0.1 statsmodels-0.14.5


In [4]:
csv_path = Path("../data/processed/electricity_total_consumption_15min.csv")
df = pd.read_csv(csv_path, parse_dates=['datetime'], index_col='datetime')

df_feat = df.copy()

# —————————————
# Time Based Features
# —————————————

df_feat['hour']           = df_feat.index.hour
df_feat['dayofweek']      = df_feat.index.dayofweek
df_feat['month']          = df_feat.index.month
df_feat['day']            = df_feat.index.day
df_feat['isweekend']      = df_feat['dayofweek'] >= 5


# —————————————
# Cyclical Encoding
# —————————————

df_feat['hour_sin']  = np.sin(2*np.pi * df_feat['hour']  / 24)
df_feat['hour_cos']  = np.cos(2*np.pi * df_feat['hour']  / 24)
df_feat['month_sin'] = np.sin(2*np.pi * (df_feat['month']-1) / 12)
df_feat['month_cos'] = np.cos(2*np.pi * (df_feat['month']-1) / 12)

# —————————————
# Swiss Holidays
# —————————————



ch_holidays = holidays.CH()
df_feat['is_holiday'] = df_feat.index.normalize().isin(ch_holidays)



# —————————————
# Moving Average / rolling Statistics
# —————————————

df_feat['rolling_24h_mean'] = df_feat['total_consumption_kWh'].rolling(96, min_periods=1).mean()
df_feat['rolling_7d_mean']  = df_feat['total_consumption_kWh'].rolling(96*7, min_periods=1).mean()




# —————————————
# Seasonal Decomp.
# —————————————


df_feat['rolling_24h_mean'] = df_feat['total_consumption_kWh'].rolling(96, min_periods=1).mean()
df_feat['rolling_7d_mean']  = df_feat['total_consumption_kWh'].rolling(96*7, min_periods=1).mean()


# —————————————
# Seasonal Decomp on daily Sums.
# —————————————

daily               = df_feat['total_consumption_kWh'].resample('D').sum()
decomp              = seasonal_decompose(daily, model='additive', period=365, extrapolate_trend='freq')
df_feat['trend']    = decomp.trend.reindex(df_feat.index, method='ffill')
df_feat['seasonal'] = decomp.seasonal.reindex(df_feat.index, method='ffill')


# —————————————
# Out Path
# —————————————


out_dir = Path("../data/processed")
out_dir.mkdir(exist_ok=True,parents=True)
df_feat.to_csv(out_dir/"electricity_features_15min.csv")


