# Improving Accuracy of Energy Timeseries Forecasts EnBW Datalab

In [1]:
# IMPORTING LIBRARIES

import pandas as pd
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
from statsmodels.tsa.seasonal import seasonal_decompose
import os
import torch
from torch.utils.data import Dataset, DataLoader
import seaborn as sns; sns.set()
import lightgbm as lgb
import math
import smtplib
from datetime import datetime
from email.message import EmailMessage
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import pvlib
from datetime import date, timedelta
import holidays
from workalendar.europe import Germany
from datetime import date, timedelta

import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.metrics import mean_squared_error

In [3]:
# Load Data


data = pd.read_csv("./timeseries_data/trafo_67.csv")
data["timestamp"] = pd.to_datetime(data["timestampUtc"])
data_modified = data.drop(columns=["timestampUtc"]).set_index("timestamp")

In [6]:
data_modified

Unnamed: 0_level_0,value,t_2mc,aswdifd_s,aswdir_s,vmax_10m,tot_prec
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-10-01 00:00:00+00:00,-0.089358,5.134399,0.0,0.0,2.764953,0.0
2021-10-01 00:15:00+00:00,-0.086939,5.134399,0.0,0.0,2.764953,0.0
2021-10-01 00:30:00+00:00,-0.137255,5.134399,0.0,0.0,2.764953,0.0
2021-10-01 00:45:00+00:00,-0.066135,5.134399,0.0,0.0,2.764953,0.0
2021-10-01 01:00:00+00:00,-0.073392,4.748993,0.0,0.0,2.036687,0.0
...,...,...,...,...,...,...
2023-09-30 23:00:00+00:00,-0.428512,10.969238,0.0,0.0,2.460899,0.0
2023-09-30 23:15:00+00:00,-0.454154,10.969238,0.0,0.0,2.460899,0.0
2023-09-30 23:30:00+00:00,-0.485602,10.969238,0.0,0.0,2.460899,0.0
2023-09-30 23:45:00+00:00,-0.492859,10.969238,0.0,0.0,2.460899,0.0


### FEATURE ENGINEERING

In [4]:
# Create a Germany calendar instance
germany_calendar = Germany()

# Define the range of years for which you want to get the holidays
start_year = 2019
end_year = 2023

# Initialize an empty DataFrame to store holidays
holidays_df = pd.DataFrame(columns=['date', 'holiday_name'])

# Get holidays for each year and append to the DataFrame
for year in range(start_year, end_year + 1):
    holidays = germany_calendar.holidays(year)
    year_holidays_df = pd.DataFrame(holidays, columns=['date', 'holiday_name'])
    holidays_df = pd.concat([holidays_df, year_holidays_df], ignore_index=True)

holidays_df = holidays_df.sort_values(by='date')

In [7]:
# Co ordinates of Stuttgart for PVLib features

latitude = 48.7758  # Replace with your latitude
longitude = 9.1829  # Replace with your longitude

In [20]:
data_modified_fe = data_modified.copy()

# Encoding local timestamp
data_modified_fe['Day_value_Stamp'] = data_modified.index.hour * 4 + data_modified.index.minute / 15
# Day of the week
data_modified_fe['day_of_week'] = data_modified.index.dayofweek
# Day of the year
data_modified_fe['Day_value_Stamp'] = data_modified.index.dayofyear
# Weekend flag
data_modified_fe['is_weekend'] = (data_modified_fe['day_of_week'] >= 5).astype(int)
# Holidays
data_dates = pd.to_datetime(data_modified_fe.index.date)
data_modified_fe['Holidays'] = data_dates.isin(holidays_df['date']).astype(int)
# Indicator variable for bridge days
data_modified_fe['is_bridgeday'] = (
    (data_modified_fe['day_of_week'] == 0) &  # Monday
    (data_modified_fe['Holidays'].shift(-1) == 1) &  # Holiday on Tuesday
    (data_modified_fe['Holidays'] != 1)  ).astype(int)
# Seasons 0-winter; 1-spring; 2-summer; 3-fall
data_modified_fe["season"] = np.where(data_modified_fe.index.month.isin([12, 1, 2]), 0, 1)
data_modified_fe["season"] = np.where(data_modified_fe.index.month.isin([3, 4, 5]), 1, data_modified_fe["season"])
data_modified_fe["season"] = np.where(data_modified_fe.index.month.isin([6, 7, 8]), 2, data_modified_fe["season"])
data_modified_fe["season"] = np.where(data_modified_fe.index.month.isin([9, 10, 11]), 3, data_modified_fe["season"])


# Feature based on Solar position: zenith and azimuth angles
data_modified_fe['latitude'] = latitude
data_modified_fe['longitude'] = longitude
solar_position = pvlib.solarposition.get_solarposition(
    data_modified_fe.index , data_modified_fe['latitude'], data_modified_fe['longitude']  )
zenith_angle = solar_position['apparent_zenith']
azimuth_angle = solar_position['azimuth']

data_modified_fe['zenith_angle'] = zenith_angle
data_modified_fe['azimuth_angle'] = azimuth_angle
data_modified_fe.drop(columns=["latitude", "longitude"], inplace=True)



In [21]:
data_modified_fe.head(20000)

Unnamed: 0_level_0,value,t_2mc,aswdifd_s,aswdir_s,vmax_10m,tot_prec,Day_value_Stamp,day_of_week,is_weekend,Holidays,is_bridgeday,season,zenith_angle,azimuth_angle
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-10-01 00:00:00+00:00,-0.089358,5.134399,0.00000,0.00000,2.764953,0.0,274,4,0,0,0,3,133.317310,16.227227
2021-10-01 00:15:00+00:00,-0.086939,5.134399,0.00000,0.00000,2.764953,0.0,274,4,0,0,0,3,132.527496,21.227158
2021-10-01 00:30:00+00:00,-0.137255,5.134399,0.00000,0.00000,2.764953,0.0,274,4,0,0,0,3,131.539300,26.091239
2021-10-01 00:45:00+00:00,-0.066135,5.134399,0.00000,0.00000,2.764953,0.0,274,4,0,0,0,3,130.365580,30.800709
2021-10-01 01:00:00+00:00,-0.073392,4.748993,0.00000,0.00000,2.036687,0.0,274,4,0,0,0,3,129.020361,35.343468
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-27 06:45:00+00:00,0.577339,6.171021,129.24979,148.55510,3.964745,0.0,117,2,0,0,0,1,65.793569,96.551892
2022-04-27 07:00:00+00:00,0.422518,7.961273,175.28421,188.56332,4.347351,0.0,117,2,0,0,0,1,63.347885,99.534867
2022-04-27 07:15:00+00:00,0.365912,7.961273,175.28421,188.56332,4.347351,0.0,117,2,0,0,0,1,60.923230,102.604197
2022-04-27 07:30:00+00:00,0.345592,7.961273,175.28421,188.56332,4.347351,0.0,117,2,0,0,0,1,58.527490,105.776251
