In [None]:
from tqdm import tqdm
from requests import request
import pandas as pd
from os import listdir
from os.path import isfile, join

In [None]:
res = request("GET", 'https://www.smard.de/app/chart_data/4359/DE/index_hour.json')
timestamps = res.json()["timestamps"]

In [None]:
time_series = []
for timestamp in tqdm(timestamps):
    res = request("GET", "https://www.smard.de/app/chart_data/4359/DE/4359_DE_hour_" +str(timestamp)+".json")
    body = res.json()["series"]
    time_series += body

In [None]:
residual_load_dataset = pd.DataFrame(time_series)
residual_load_dataset.index = [pd.Timestamp(x, unit='ms') for x in residual_load_dataset.iloc[:, 0]]
residual_load_dataset = residual_load_dataset.drop(columns=0)
residual_load_dataset = residual_load_dataset.dropna()
residual_load_dataset.columns = ["Energy Consumption"]
residual_load_dataset

In [None]:
def extract_dwd_data(residual_load_dataset, station_id):
    
    station_data = [x for x in listdir() if station_id in x]
    
    wind_data = pd.read_csv([x for x in station_data if "produkt_f" in x][0], sep=";").iloc[-80000:]
    solar_data = pd.read_csv([x for x in station_data if "produkt_sd" in x][0], sep=";").iloc[-80000:]
    temperature_data = pd.read_csv([x for x in station_data if "produkt_tu" in x][0], sep=";").iloc[-80000:]
    precipitation_data = pd.read_csv([x for x in station_data if "produkt_rr" in x][0], sep=";").iloc[-80000:]
    
    wind_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in wind_data["MESS_DATUM"]]
    wind_data = wind_data.drop(columns="MESS_DATUM")
    wind_data = wind_data.rename(columns={"  FF": "Wind Velocity for Station " + station_id})
    solar_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in solar_data["MESS_DATUM"]]
    solar_data = solar_data.drop(columns="MESS_DATUM")
    solar_data = solar_data.rename(columns={"SD_SO": "Sun Duration for Station " + station_id})
    temperature_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in temperature_data["MESS_DATUM"]]
    temperature_data = temperature_data.drop(columns="MESS_DATUM")
    temperature_data = temperature_data.rename(columns={"TT_TU": "Air Temperature for Station " + station_id})
    precipitation_data.index = [pd.to_datetime(x, format="%Y%m%d%H") for x in precipitation_data["MESS_DATUM"]]
    precipitation_data = precipitation_data.drop(columns="MESS_DATUM")
    precipitation_data = precipitation_data.rename(columns={"  R1": "Precipitation Amount for Station " + station_id})

    residual_load_dataset = pd.concat([residual_load_dataset, wind_data["Wind Velocity for Station " + station_id], solar_data["Sun Duration for Station " + station_id], temperature_data["Air Temperature for Station " + station_id], precipitation_data["Precipitation Amount for Station " + station_id]], axis=1)
    
    return residual_load_dataset

In [None]:
for i in ["02014", "03987", "03379", "04928"]:
    residual_load_dataset = extract_dwd_data(residual_load_dataset, i)

In [None]:
residual_load_dataset = residual_load_dataset.dropna()

In [None]:
residual_load_dataset["Quarter"] = residual_load_dataset.index
residual_load_dataset["Quarter"] = residual_load_dataset["Quarter"].dt.quarter

In [None]:
residual_load_dataset["Month"] = [x.month for x in list(residual_load_dataset.index)]

In [None]:
cols = residual_load_dataset.columns.tolist()
cols = cols[1:] + [cols[0]]
residual_load_dataset = residual_load_dataset[cols]
residual_load_dataset 

In [None]:
from sklearn.model_selection import train_test_split

X_test, X_train, y_test, y_train = train_test_split(residual_load_dataset.drop(columns="Energy Consumption"), residual_load_dataset["Energy Consumption"])

In [None]:
from sklearn.ensemble import *
from sklearn.linear_model import *

clf = LinearRegression()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)

In [None]:
import math
from sklearn.metrics import *

math.pow(mean_squared_error(y_test, y_pred), 1/2)

In [None]:
test = pd.DataFrame()
test["Y TRUE"] = y_test
test["Y PRED"] = y_pred
test["Difference"] = abs(y_test - y_pred)
test.describe()

In [None]:
residual_load_dataset.to_csv("Energy Consumption Dataset.csv")

In [None]:
import numpy as np

y_mean = y_test.mean()
y_pred = np.empty(y_test.size)
y_pred.fill(y_mean)

In [None]:
import math
from sklearn.metrics import *

math.pow(mean_squared_error(y_test, y_pred), 1/2)