In [137]:
from tqdm import tqdm
from requests import request
import pandas as pd

In [138]:
res = request("GET", 'https://www.smard.de/app/chart_data/410/DE/index_hour.json')
timestamps = res.json()["timestamps"]

In [139]:
time_series = []
for timestamp in tqdm(timestamps):
    res = request("GET", "https://www.smard.de/app/chart_data/410/DE/410_DE_hour_" +str(timestamp)+".json")
    body = res.json()["series"]
    time_series += body

100%|██████████| 459/459 [01:02<00:00,  7.38it/s]


In [140]:
energy_consumption = pd.DataFrame(time_series)
energy_consumption.index = [pd.Timestamp(x, unit='ms') for x in energy_consumption.iloc[:, 0]]
energy_consumption = energy_consumption.drop(columns=0)
energy_consumption = energy_consumption.dropna()
energy_consumption.columns = ["Energy Consumption"]
energy_consumption

Unnamed: 0,Energy Consumption
2014-12-31 23:00:00,44600.25
2015-01-01 00:00:00,43454.75
2015-01-01 01:00:00,41963.25
2015-01-01 02:00:00,40617.75
2015-01-01 03:00:00,39936.75
...,...
2023-10-11 11:00:00,65267.25
2023-10-11 12:00:00,64135.75
2023-10-11 13:00:00,63724.50
2023-10-11 14:00:00,62551.00


In [141]:
wind_data = pd.read_csv("produkt_f_stunde_19750701_20221231_03987.txt", sep=";").iloc[-80000:]
solar_data = pd.read_csv("produkt_sd_stunde_20060818_20200416_03897.txt", sep=";").iloc[-80000:]
temperature_data = pd.read_csv("produkt_tu_stunde_18930101_20221231_03987.txt", sep=";").iloc[-80000:]
precipitation_data = pd.read_csv("produkt_rr_stunde_19950901_20221231_03987.txt", sep=";").iloc[-80000:]

In [142]:
wind_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in wind_data["MESS_DATUM"]]
wind_data = wind_data.drop(columns="MESS_DATUM")
wind_data = wind_data.rename(columns={"  FF": "Wind Velocity"})
solar_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in solar_data["MESS_DATUM"]]
solar_data = solar_data.drop(columns="MESS_DATUM")
solar_data = solar_data.rename(columns={"SD_SO": "Sun Duration"})
temperature_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in temperature_data["MESS_DATUM"]]
temperature_data = temperature_data.drop(columns="MESS_DATUM")
temperature_data = temperature_data.rename(columns={"TT_TU": "Air Temperature"})
precipitation_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in precipitation_data["MESS_DATUM"]]
precipitation_data = precipitation_data.drop(columns="MESS_DATUM")
precipitation_data = precipitation_data.rename(columns={"  R1": "Precipitation Amount"})

In [143]:
energy_consumption_dataset = pd.concat([energy_consumption, wind_data["Wind Velocity"], solar_data["Sun Duration"], temperature_data["Air Temperature"], precipitation_data["Precipitation Amount"]], axis=1)

In [144]:
energy_consumption_dataset = energy_consumption_dataset.dropna()

In [145]:
energy_consumption_dataset.describe()

Unnamed: 0,Energy Consumption,Wind Velocity,Sun Duration,Air Temperature,Precipitation Amount
count,34539.0,34539.0,34539.0,34539.0,34539.0
mean,60676.725484,4.225418,16.148383,11.069104,-0.371974
std,9109.729115,1.866284,25.374222,8.439315,20.820656
min,34807.5,0.2,-999.0,-13.1,-999.0
25%,53434.875,2.9,0.0,4.5,0.0
50%,62185.0,4.0,0.0,10.1,0.0
75%,68012.0,5.3,34.0,17.4,0.0
max,79938.5,19.0,60.0,37.7,25.3


In [146]:
from sklearn.model_selection import train_test_split

X_test, X_train, y_test, y_train = train_test_split(energy_consumption_dataset.drop(columns="Energy Consumption"), energy_consumption_dataset["Energy Consumption"])

In [147]:
from sklearn.ensemble import *
from sklearn.linear_model import *

clf = LinearRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [148]:
import math
from sklearn.metrics import *

math.pow(mean_squared_error(y_test, y_pred), 1/2)

8930.913205184806

In [149]:
test = pd.DataFrame()
test["Y TRUE"] = y_test
test["Y PRED"] = y_pred
test["Difference"] = abs(y_test - y_pred)
test.describe()

Unnamed: 0,Y TRUE,Y PRED,Difference
count,25904.0,25904.0,25904.0
mean,60633.219734,60821.33295,7644.995332
std,9126.332396,1689.424816,4616.934041
min,34807.5,26760.576878,0.220322
25%,53298.3125,59618.656646,4106.95101
50%,62112.75,60943.753843,7400.848834
75%,68036.1875,62049.285343,10446.512464
max,79938.5,70225.501198,42436.673122


In [150]:
energy_consumption_dataset.to_csv("Energy Consumption Dataset.csv")