In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
import math
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
time = pd.read_csv(r"C:\Users\ramya\Downloads\time_series_with_external_factors.csv")

In [3]:
time['date'] = pd.to_datetime(time['date'])
time = time.set_index('date')

In [4]:
time.head()

Unnamed: 0_level_0,electricity_demand,temperature_celsius,rainfall_mm,is_holiday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01,414.34,26.17,1.85,0
2019-01-02,399.36,25.07,12.7,0
2019-01-03,391.19,26.81,8.28,0
2019-01-04,433.55,28.73,5.96,0
2019-01-05,428.43,25.39,2.98,1


In [5]:
# lag features created
time['lag_1'] = time['electricity_demand'].shift(1)
time['lag_7'] = time['electricity_demand'].shift(7)
time['lag_30'] = time['electricity_demand'].shift(30)

time[['electricity_demand','lag_1','lag_7','lag_30']].head(35)

Unnamed: 0_level_0,electricity_demand,lag_1,lag_7,lag_30
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01,414.34,,,
2019-01-02,399.36,414.34,,
2019-01-03,391.19,399.36,,
2019-01-04,433.55,391.19,,
2019-01-05,428.43,433.55,,
2019-01-06,417.23,428.43,,
2019-01-07,443.44,417.23,,
2019-01-08,399.71,443.44,414.34,
2019-01-09,408.87,399.71,399.36,
2019-01-10,475.9,408.87,391.19,


In [6]:
# Creating rolling Features

In [7]:
time['rolling_7_mean'] = time['electricity_demand'].rolling(window=7).mean()
time['rolling_30_mean'] = time['electricity_demand'].rolling(window=30).mean()

time[['rolling_7_mean','rolling_30_mean']].head(40)

Unnamed: 0_level_0,rolling_7_mean,rolling_30_mean
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-01,,
2019-01-02,,
2019-01-03,,
2019-01-04,,
2019-01-05,,
2019-01-06,,
2019-01-07,418.22,
2019-01-08,416.13,
2019-01-09,417.488571,
2019-01-10,429.59,


In [None]:
# Createing Calendar feature

In [8]:
time['day'] = time.index.day
time['month'] = time.index.month
time['day_of_week'] = time.index.dayofweek
time['week_of_year'] = time.index.isocalendar().week

time[['day','month','day_of_week','week_of_year']].head()

Unnamed: 0_level_0,day,month,day_of_week,week_of_year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-01,1,1,1,1
2019-01-02,2,1,2,1
2019-01-03,3,1,3,1
2019-01-04,4,1,4,1
2019-01-05,5,1,5,1


In [9]:
# Createing weekend feature
time['is_weekend'] = np.where(time['day_of_week'] >= 5, 1, 0)
time[['day_of_week','is_weekend']].head(20)

Unnamed: 0_level_0,day_of_week,is_weekend
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-01,1,0
2019-01-02,2,0
2019-01-03,3,0
2019-01-04,4,0
2019-01-05,5,1
2019-01-06,6,1
2019-01-07,0,0
2019-01-08,1,0
2019-01-09,2,0
2019-01-10,3,0


In [10]:
# Handle missing values
time.isnull().sum()

electricity_demand      0
temperature_celsius     0
rainfall_mm             0
is_holiday              0
lag_1                   1
lag_7                   7
lag_30                 30
rolling_7_mean          6
rolling_30_mean        29
day                     0
month                   0
day_of_week             0
week_of_year            0
is_weekend              0
dtype: int64

In [11]:
# Drop Missing Rows

time = time.dropna()
time.isna().sum()

electricity_demand     0
temperature_celsius    0
rainfall_mm            0
is_holiday             0
lag_1                  0
lag_7                  0
lag_30                 0
rolling_7_mean         0
rolling_30_mean        0
day                    0
month                  0
day_of_week            0
week_of_year           0
is_weekend             0
dtype: int64

In [12]:
# Feature vs Target Separation

X = time.drop('electricity_demand', axis=1)
Y = time['electricity_demand']

print("Feature shape:", X.shape)
print("Target shape:", Y.shape)

Feature shape: (1796, 13)
Target shape: (1796,)


In [13]:
X # it will only contains feature variables

Unnamed: 0_level_0,temperature_celsius,rainfall_mm,is_holiday,lag_1,lag_7,lag_30,rolling_7_mean,rolling_30_mean,day,month,day_of_week,week_of_year,is_weekend
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2019-01-31,28.88,3.04,0,445.62,430.58,414.34,435.101429,426.843667,31,1,3,5,0
2019-02-01,33.94,2.47,0,429.01,436.01,399.36,437.488571,428.622333,1,2,4,5,0
2019-02-02,30.35,6.83,1,452.72,438.60,391.19,435.014286,429.625333,2,2,5,5,1
2019-02-03,28.41,7.43,1,421.28,411.02,433.55,439.692857,429.966000,3,2,6,5,1
2019-02-04,32.31,5.02,0,443.77,441.64,428.43,444.185714,431.454667,4,2,0,6,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-27,24.47,10.17,0,422.51,393.25,392.15,406.455714,399.983000,27,12,2,52,0
2023-12-28,24.03,3.58,0,405.44,405.71,361.87,407.762857,401.749333,28,12,3,52,0
2023-12-29,23.36,4.53,0,414.86,384.14,403.54,409.514286,401.511333,29,12,4,52,0
2023-12-30,25.17,4.23,1,396.40,418.20,393.41,416.420000,403.949000,30,12,5,52,1


In [14]:
Y # it will only can target variables

date
2019-01-31    429.01
2019-02-01    452.72
2019-02-02    421.28
2019-02-03    443.77
2019-02-04    473.09
               ...  
2023-12-27    405.44
2023-12-28    414.86
2023-12-29    396.40
2023-12-30    466.54
2023-12-31    402.21
Name: electricity_demand, Length: 1796, dtype: float64

In [15]:
X.columns # Final Feature List

Index(['temperature_celsius', 'rainfall_mm', 'is_holiday', 'lag_1', 'lag_7',
       'lag_30', 'rolling_7_mean', 'rolling_30_mean', 'day', 'month',
       'day_of_week', 'week_of_year', 'is_weekend'],
      dtype='object')

In [16]:
time.to_csv("feature_engineered_time_series.csv")