# Electricity demand forecasting

In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
from meteostat import Hourly

### Load and clean the data from power plant

In [6]:
df = pd.read_csv('data/power_plant.csv', delimiter=';')
df = df[df['Power [MW]'] != '-']
df['Power [MW]'] = df['Power [MW]'].str.replace(',', '.').astype(float)
df[['Date', 'Time']] = df['Date'].str.split(' ', expand=True)
df['Date'] = pd.to_datetime(df['Date'], format='%d.%m.%Y').dt.strftime("%Y-%m-%d")
df = df.reindex(columns=['Date', 'Time', 'Power [MW]'])

### Fetch holiday data

In [7]:
# years = pd.to_datetime(df['Date'], format='%Y-%m-%d').dt.strftime("%Y").unique()
# holiday_dates = []
# for year in years:
#     url = f"https://digidates.de/api/v1/germanpublicholidays?year={year}&region=de-be"
#     response = requests.get(url)
#     holidays = response.json()
#     [holiday_dates.append(holiday) for holiday in holidays.keys()]

# holiday_df = pd.DataFrame(data={"Date": holiday_dates})
# holiday_df.to_csv('data/holidays.csv')

holiday_df = pd.read_csv('data/holidays.csv')
df['Holiday'] = df['Date'].isin(holiday_df['Date'])
df.head()

Unnamed: 0,Date,Time,Power [MW],Holiday
384,2015-01-05,00:00,56.0,False
385,2015-01-05,00:15,56.0,False
386,2015-01-05,00:30,56.0,False
387,2015-01-05,00:45,56.0,False
388,2015-01-05,01:00,56.25,False


### Fetch weather data

In [8]:
# start = min(pd.to_datetime(df['Date'], format='%Y-%m-%d'))
# latest_date = pd.to_datetime(df['Date'], format='%Y-%m-%d').max().strftime('%Y-%m-%d')
# latest_time = df.loc[df['Date'] == latest_date, 'Time'].max()
# end = pd.to_datetime(f"{latest_date} {latest_time}")

# weather = Hourly('10384', start, end)
# weather = weather.fetch()
# weather.to_csv('data/weather_data.csv')

weather_df = pd.read_csv('data/weather.csv')

weather_df = weather_df.rename(columns={
    'time': 'Date',
    'temp': 'Temperature [°C]',
    'dwpt': 'Dew Point [°C]',
    'rhum': 'Relative Humidity [%]',
    'prcp': 'Precipitation [mm]',
    'snow': 'Snow depth [mm]',
    'wdir': 'Wind Direction [°]',
    'wspd': 'Average Wind Speed [km/s]',
    'wpgt': 'Peak Wind Speed [km/s]',
    'pres': 'Average Sea-Level Air Pressure [hPa]',
    'tsun': 'Sunshine Duration [min]',
    'coco': 'Weather Condition Code'
})

weather_df['Date'] = pd.to_datetime(weather_df['Date'])
weather_df = weather_df.set_index('Date').sort_index()
weather_df = weather_df.resample('15min').interpolate(method='linear')
weather_df = weather_df.reset_index()
weather_df[['Date', 'Time']] = weather_df['Date'].astype(str).str.split(' ', expand=True)
weather_df['Time'] = pd.to_datetime(weather_df['Time'], format='%H:%M:%S').dt.strftime('%H:%M')

df = pd.merge(df, weather_df, on=['Date', 'Time'], how='left')