# Electricity demand forecasting

In [11]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
from meteostat import Hourly

### Load and clean the data from power plant

In [12]:
df = pd.read_csv('data/power_plant.csv', delimiter=';')
df = df[df['Power'] != '-']
df['Power'] = df['Power'].str.replace(',', '.').astype(float)
df[['Date', 'Time']] = df['Date'].str.split(' ', expand=True)
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y').dt.strftime("%Y-%m-%d")
df = df.reindex(columns=['Date', 'Time', 'Power'])

### Fetch holiday data

In [13]:
# years = pd.to_datetime(df['Date'], format='%Y-%m-%d').dt.strftime("%Y").unique()
# holiday_dates = []
# for year in years:
#     url = f"https://digidates.de/api/v1/germanpublicholidays?year={year}&region=de-be"
#     response = requests.get(url)
#     holidays = response.json()
#     [holiday_dates.append(holiday) for holiday in holidays.keys()]

# holiday_df = pd.DataFrame(data={"Date": holiday_dates})
# holiday_df.to_csv('data/holidays.csv')

holiday_df = pd.read_csv('data/holidays.csv')
df['Holiday'] = df['Date'].isin(holiday_df['Date'])
df.head()

Unnamed: 0,Date,Time,Power,Holiday
384,2015-01-05,00:00,56.0,False
385,2015-01-05,00:15,56.0,False
386,2015-01-05,00:30,56.0,False
387,2015-01-05,00:45,56.0,False
388,2015-01-05,01:00,56.25,False


### Fetch weather data

In [None]:
# start = min(pd.to_datetime(df['Date'], format='%Y-%m-%d'))
# latest_date = pd.to_datetime(df['Date'], format='%Y-%m-%d').max().strftime('%Y-%m-%d')
# latest_time = df.loc[df['Date'] == latest_date, 'Time'].max()
# end = pd.to_datetime(f"{latest_date} {latest_time}")

# weather = Hourly('10384', start, end)
# weather = weather.fetch()
# weather.to_csv('data/weather_data.csv')

weather_df = pd.read_csv('data/weather.csv')

weather_df = weather_df.rename(columns={
    'time': 'Date',
    'temp': 'Temperature',
    'dwpt': 'Dew Point',
    'rhum': 'Relative Humidity',
    'prcp': 'Precipitation',
    'snow': 'Snow depth',
    'wdir': 'Wind Direction',
    'wspd': 'Average Wind Speed',
    'wpgt': 'Peak Wind Speed',
    'pres': 'Average Sea-Level Air Pressure',
    'tsun': 'Sunshine Duration',
    'coco': 'Weather Condition Code'
})

weather_df['Date'] = pd.to_datetime(weather_df['Date'])
weather_df = weather_df.set_index('Date').sort_index()
weather_df = weather_df.resample('15min').interpolate(method='linear')
weather_df = weather_df.reset_index()
weather_df[['Date', 'Time']] = weather_df['Date'].astype(str).str.split(' ', expand=True)
weather_df['Time'] = pd.to_datetime(weather_df['Time'], format='%H:%M:%S').dt.strftime('%H:%M')

df = pd.merge(df, weather_df, on=['Date', 'Time'], how='left')
df['Datetime'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['Time'].astype(str), format='%Y-%m-%d %H:%M')
df = df.drop(columns=['Date', 'Time'])
df['Hour'] = df['Datetime'].dt.hour
df['DayOfWeek'] = df['Datetime'].dt.dayofweek
df['Month'] = df['Datetime'].dt.month
df['IsWeekend'] = df['DayOfWeek'].isin([5,6]).astype(int)
df.reindex(columns=['Datetime'] + [col for col in df.columns if col != 'Datetime'])


Unnamed: 0,Datetime,Power,Holiday,Temperature,Dew Point,Relative Humidity,Precipitation,Snow depth,Wind Direction,Average Wind Speed,Peak Wind Speed,Average Sea-Level Air Pressure,Sunshine Duration,Weather Condition Code,Hour,DayOfWeek,Month,IsWeekend
0,2015-01-05 00:00:00,56.00,False,2.200,0.400,88.00,0.0,,290.0,19.800,,1029.000,,,0,0,1,0
1,2015-01-05 00:15:00,56.00,False,2.250,0.525,88.50,0.0,,287.5,19.075,,1028.950,,,0,0,1,0
2,2015-01-05 00:30:00,56.00,False,2.300,0.650,89.00,0.0,,285.0,18.350,,1028.900,,,0,0,1,0
3,2015-01-05 00:45:00,56.00,False,2.350,0.775,89.50,0.0,,282.5,17.625,,1028.850,,,0,0,1,0
4,2015-01-05 01:00:00,56.25,False,2.400,0.900,90.00,0.0,,280.0,16.900,,1028.800,,,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349547,2024-12-31 22:45:00,39.00,False,2.225,-0.725,80.75,0.0,0.0,220.0,22.075,36.5,1019.475,0.0,4.0,22,1,12,0
349548,2024-12-31 23:00:00,39.00,False,2.100,-0.800,81.00,0.0,0.0,220.0,22.000,36.0,1019.400,0.0,4.0,23,1,12,0
349549,2024-12-31 23:15:00,39.00,False,,,,,,,,,,,,23,1,12,0
349550,2024-12-31 23:30:00,39.00,False,,,,,,,,,,,,23,1,12,0


### Save dataset

In [15]:
df.to_csv('data/dataset.csv', sep=';', index=False)