# Features

This notebook creates features for the raw data.

## Weather Features

Add features like temperature, humidity, sun hours, ...

Start with the daily data

In [None]:
import numpy as np
import polars as pl

from src.energy_forecast.config import RAW_DATA_DIR, PROCESSED_DATA_DIR

data_df = pl.read_csv(PROCESSED_DATA_DIR / "dataset_daily.csv").with_columns(pl.col("datetime").str.to_datetime())
data_df

Find time intervals for every city

In [None]:
data_df = data_df.with_columns(
    pl.coalesce(data_df.join(pl.read_csv(RAW_DATA_DIR / "kinergy_meta.csv"), on="id", how="left")["plz"],
                data_df.join(pl.read_csv(RAW_DATA_DIR / "legacy_meta.csv"), on="id", how="left")["plz"],
                data_df.join(pl.read_csv(RAW_DATA_DIR / "dh_meta.csv").rename({"eco_u_id": "id", "postal_code": "plz"}),
                             on="id", how="left")["plz"],
                ).str.strip_chars())

In [None]:
city_df = data_df.group_by(pl.col("plz")).agg(pl.col("datetime").min().alias("min_date"),
                                              pl.col("datetime").max().alias("max_date")).filter(
    ~(pl.col("plz") == "2700"))  # wien
city_df

Add coordinates to every city

In [None]:
import pgeocode

rows = list()
for plz in city_df["plz"].unique():
    data = pgeocode.Nominatim("de").query_postal_code(str(plz))
    rows.append({"plz": plz, "lat": data["latitude"], "lon": data["longitude"], "state": data["state_code"]})

info_df = pl.DataFrame(rows)
city_df = city_df.join(info_df, on="plz", how="left")
city_df

In [None]:
city_df.write_csv(RAW_DATA_DIR / "cities.csv")

In [6]:
from meteostat import Point, Daily

weather_dfs = list()
for row in city_df.iter_rows():
    start = row[1]
    end = row[2]
    loc = Point(row[3], row[4])

    data = Daily(loc, start, end)
    data = data.fetch()
    weather_dfs.append(pl.from_pandas(data.reset_index()).with_columns(pl.lit(row[0]).alias("plz")))
weather_df = pl.concat(weather_dfs)
weather_df



time,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun,plz
datetime[ns],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str
2018-11-29 00:00:00,2.4,0.3,5.1,5.0,0.0,157.0,23.0,47.5,1014.5,0.0,"""23795"""
2018-11-30 00:00:00,5.4,3.6,8.1,0.0,0.0,165.0,16.9,37.1,1010.1,102.0,"""23795"""
2018-12-01 00:00:00,5.8,1.0,8.4,0.4,0.0,193.0,15.5,34.2,1009.8,6.0,"""23795"""
2018-12-02 00:00:00,8.0,6.1,11.0,5.9,0.0,186.0,16.6,39.6,997.7,0.0,"""23795"""
2018-12-03 00:00:00,10.5,6.6,11.9,6.0,0.0,226.0,18.4,45.7,995.4,6.0,"""23795"""
…,…,…,…,…,…,…,…,…,…,…,…
2022-03-28 00:00:00,6.4,4.4,9.5,0.0,0.0,297.0,16.9,40.7,1022.1,24.0,"""22848"""
2022-03-29 00:00:00,6.3,2.8,10.2,0.2,0.0,294.0,16.9,42.1,1012.1,582.0,"""22848"""
2022-03-30 00:00:00,4.2,2.5,7.6,8.4,0.0,46.0,6.5,25.9,1006.3,42.0,"""22848"""
2022-03-31 00:00:00,2.2,-1.0,5.6,3.6,40.0,70.0,24.1,59.8,1005.9,216.0,"""22848"""


Merge with data

In [8]:
data_df.with_columns(pl.col("datetime").dt.date()).join(weather_df.with_columns(pl.col("time").alias("datetime")),
                                                        on=["plz", 'date'], how="left")

ColumnNotFoundError: date

Resolved plan until failure:

	---> FAILED HERE RESOLVING 'join' <---
DF ["time", "tavg", "tmin", "tmax"]; PROJECT */13 COLUMNS

From the [meteostat](https://dev.meteostat.net/python/daily.html#api) documentation:

Column	Description	Type

station	The Meteostat ID of the weather station (only if query refers to multiple stations)	String

time	The date	Datetime64

tavg	The average air temperature in °C	Float64

tmin	The minimum air temperature in °C	Float64

tmax	The maximum air temperature in °C	Float64

prcp	The daily precipitation total in mm	Float64

snow	The snow depth in mm	Float64

wdir	The average wind direction in degrees (°)	Float64

wspd	The average wind speed in km/h	Float64

wpgt	The peak wind gust in km/h	Float64

pres	The average sea-level air pressure in hPa	Float64

tsun	The daily sunshine total in minutes (m)	Float64


Humidity is missing from Daily-data, we can retrieve hourly data and merge to daily data

In [9]:
from meteostat import Point, Hourly

weather_dfs = list()
for row in city_df.iter_rows():
    start = row[1]
    end = row[2]
    loc = Point(row[3], row[4])

    data = Hourly(loc, start, end)
    data = data.fetch()
    weather_dfs.append(pl.from_pandas(data.reset_index()).group_by_dynamic(
        index_column="time", every="1d"
    ).agg(pl.col("rhum").mean().alias("hum_avg"),
          pl.col("rhum").min().alias("hum_min"),
          pl.col("rhum").max().alias("hum_max")
          ).with_columns(pl.lit(row[0]).alias("plz")))
weather_df_hourly = pl.concat(weather_dfs)
weather_df_hourly



time,hum_avg,hum_min,hum_max,plz
datetime[ns],f64,f64,f64,str
2018-11-29 00:00:00,84.958333,73.0,95.0,"""23795"""
2018-11-30 00:00:00,78.166667,69.0,94.0,"""23795"""
2018-12-01 00:00:00,90.333333,80.0,100.0,"""23795"""
2018-12-02 00:00:00,96.833333,93.0,100.0,"""23795"""
2018-12-03 00:00:00,92.708333,86.0,98.0,"""23795"""
…,…,…,…,…
2022-03-28 00:00:00,84.291667,71.0,95.0,"""22848"""
2022-03-29 00:00:00,67.875,52.0,86.0,"""22848"""
2022-03-30 00:00:00,80.75,62.0,92.0,"""22848"""
2022-03-31 00:00:00,68.041667,35.0,100.0,"""22848"""


Add to other weather data

In [10]:
weather_df = weather_df_hourly.join(weather_df, on=["plz", 'time'], how="left")
weather_df

time,hum_avg,hum_min,hum_max,plz,tavg,tmin,tmax,prcp,snow,wdir,wspd,wpgt,pres,tsun
datetime[ns],f64,f64,f64,str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2018-11-29 00:00:00,84.958333,73.0,95.0,"""23795""",2.4,0.3,5.1,5.0,0.0,157.0,23.0,47.5,1014.5,0.0
2018-11-30 00:00:00,78.166667,69.0,94.0,"""23795""",5.4,3.6,8.1,0.0,0.0,165.0,16.9,37.1,1010.1,102.0
2018-12-01 00:00:00,90.333333,80.0,100.0,"""23795""",5.8,1.0,8.4,0.4,0.0,193.0,15.5,34.2,1009.8,6.0
2018-12-02 00:00:00,96.833333,93.0,100.0,"""23795""",8.0,6.1,11.0,5.9,0.0,186.0,16.6,39.6,997.7,0.0
2018-12-03 00:00:00,92.708333,86.0,98.0,"""23795""",10.5,6.6,11.9,6.0,0.0,226.0,18.4,45.7,995.4,6.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2022-03-28 00:00:00,84.291667,71.0,95.0,"""22848""",6.4,4.4,9.5,0.0,0.0,297.0,16.9,40.7,1022.1,24.0
2022-03-29 00:00:00,67.875,52.0,86.0,"""22848""",6.3,2.8,10.2,0.2,0.0,294.0,16.9,42.1,1012.1,582.0
2022-03-30 00:00:00,80.75,62.0,92.0,"""22848""",4.2,2.5,7.6,8.4,0.0,46.0,6.5,25.9,1006.3,42.0
2022-03-31 00:00:00,68.041667,35.0,100.0,"""22848""",2.2,-1.0,5.6,3.6,40.0,70.0,24.1,59.8,1005.9,216.0


In [11]:
weather_df.write_csv(RAW_DATA_DIR / "weather_daily.csv")

In [None]:
data_df.with_columns(pl.col("date").dt.date()).join(weather_df.with_columns(pl.col("time").dt.date().alias("date")),
                                                    on=["plz", 'date'], how="left")

Get hourly weather data as well

In [12]:
from meteostat import Point, Hourly

weather_dfs = list()
for row in city_df.iter_rows():
    start = row[1]
    end = row[2]
    loc = Point(row[3], row[4])

    data = Hourly(loc, start, end)
    data = data.fetch()
    weather_dfs.append(pl.from_pandas(data.reset_index()).with_columns(pl.lit(row[0]).alias("plz")))
weather_df_hourly = pl.concat(weather_dfs)
weather_df_hourly



time,temp,dwpt,rhum,prcp,snow,wdir,wspd,wpgt,pres,tsun,coco,plz
datetime[ns],f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,str
2018-11-29 00:00:00,0.9,-3.4,73.0,0.0,0.0,150.0,25.6,46.0,1019.3,0.0,4.0,"""23795"""
2018-11-29 01:00:00,1.0,-3.1,74.0,0.1,0.0,160.0,25.6,48.0,1018.7,0.0,8.0,"""23795"""
2018-11-29 02:00:00,0.8,-3.1,75.0,0.0,0.0,150.0,25.6,45.0,1018.3,0.0,8.0,"""23795"""
2018-11-29 03:00:00,1.2,-2.9,74.0,0.0,0.0,150.0,24.8,41.0,1018.1,0.0,4.0,"""23795"""
2018-11-29 04:00:00,0.8,-2.4,79.0,0.0,0.0,150.0,24.1,44.0,1017.5,0.0,7.0,"""23795"""
…,…,…,…,…,…,…,…,…,…,…,…,…
2022-03-31 20:00:00,2.7,-7.0,49.0,0.0,0.0,70.0,25.6,46.0,1007.5,0.0,4.0,"""22848"""
2022-03-31 21:00:00,2.2,-6.9,51.0,0.0,0.0,70.0,24.1,38.0,1007.8,0.0,4.0,"""22848"""
2022-03-31 22:00:00,2.0,-6.3,54.0,0.0,0.0,60.0,21.6,40.0,1008.1,0.0,4.0,"""22848"""
2022-03-31 23:00:00,2.0,-6.8,52.0,0.0,0.0,70.0,20.5,40.0,1008.2,0.0,4.0,"""22848"""


In [13]:
weather_df_hourly.write_csv(RAW_DATA_DIR / "weather_hourly.csv")

## Time Features

School/University Break, Holidays

In [11]:
import holidays

holiday_dict = dict()
ger_holidays = holidays.country_holidays("DE", years=range(2018, 2024))
holiday_dict.update(ger_holidays)
holiday_dict

{datetime.date(2018, 1, 1): "New Year's Day",
 datetime.date(2018, 3, 30): 'Good Friday',
 datetime.date(2018, 4, 2): 'Easter Monday',
 datetime.date(2018, 5, 1): 'Labor Day',
 datetime.date(2018, 5, 10): 'Ascension Day',
 datetime.date(2018, 5, 21): 'Whit Monday',
 datetime.date(2018, 10, 3): 'German Unity Day',
 datetime.date(2018, 12, 25): 'Christmas Day',
 datetime.date(2018, 12, 26): 'Second Day of Christmas',
 datetime.date(2019, 1, 1): "New Year's Day",
 datetime.date(2019, 4, 19): 'Good Friday',
 datetime.date(2019, 4, 22): 'Easter Monday',
 datetime.date(2019, 5, 1): 'Labor Day',
 datetime.date(2019, 5, 30): 'Ascension Day',
 datetime.date(2019, 6, 10): 'Whit Monday',
 datetime.date(2019, 10, 3): 'German Unity Day',
 datetime.date(2019, 12, 25): 'Christmas Day',
 datetime.date(2019, 12, 26): 'Second Day of Christmas',
 datetime.date(2020, 1, 1): "New Year's Day",
 datetime.date(2020, 4, 10): 'Good Friday',
 datetime.date(2020, 4, 13): 'Easter Monday',
 datetime.date(2020, 5, 1

In [12]:
holidays_state_dict = dict()
for state in city_df["state"].unique():
    state_holidays = holidays.country_holidays("DE", subdiv=state, years=range(2018, 2024))
    holidays_state_dict.update({state: state_holidays})

In [None]:
import numpy as np

holiday_list = list()
for state in city_df["state"].unique():
    for date, holiday in holidays_state_dict[state].items():
        holiday_list.append({"state": state, "start": date, "end": "null", "type": holiday})

pl.DataFrame(holiday_list).cast({"end": pl.Date}, strict=False)

In [None]:
from src.energy_forecast.config import DATA_DIR

df_holidays = pl.read_csv(DATA_DIR / "ferien.csv", separator=";").with_columns(pl.col("start").str.to_date(),
                                                                               pl.col("end").str.to_date())
pl.concat([df_holidays, pl.DataFrame(holiday_list).cast({"end": pl.Date}, strict=False)]).write_csv(
    RAW_DATA_DIR / "holidays.csv")

In [None]:
data_df.with_columns(pl.col("date").dt.date().alias("date"))

In [15]:
data_df = data_df.with_columns(pl.col("date").dt.date().alias("date")
                               ).with_columns(
    pl.when(pl.col("date").is_in(set(holiday_dict.keys()))).then(1).otherwise(0).alias("holiday"))
data_df

id,date,diff,primary_energy,adresse,ort,plz,source,holiday
str,date,f64,str,str,str,i64,str,i32
"""0c9ad311-b86f-4371-a695-512ca4…",2022-10-01,703.0,"""district heating""","""Kielortring 14""","""Norderstedt""",22850,"""dh""",0
"""0c9ad311-b86f-4371-a695-512ca4…",2022-10-02,334.0,"""district heating""","""Kielortring 14""","""Norderstedt""",22850,"""dh""",0
"""0c9ad311-b86f-4371-a695-512ca4…",2022-10-03,891.0,"""district heating""","""Kielortring 14""","""Norderstedt""",22850,"""dh""",1
"""0c9ad311-b86f-4371-a695-512ca4…",2022-10-04,661.0,"""district heating""","""Kielortring 14""","""Norderstedt""",22850,"""dh""",0
"""0c9ad311-b86f-4371-a695-512ca4…",2022-10-05,499.0,"""district heating""","""Kielortring 14""","""Norderstedt""",22850,"""dh""",0
…,…,…,…,…,…,…,…,…
"""4008231VG""",2022-03-11,1435.1,"""gas""","""Tinnumer Weg 1-9, Morsumer Weg…","""Hamburg""",22117,"""legacy""",0
"""4008231VG""",2022-03-12,1083.3,"""gas""","""Tinnumer Weg 1-9, Morsumer Weg…","""Hamburg""",22117,"""legacy""",0
"""4008231VG""",2022-03-13,1038.4,"""gas""","""Tinnumer Weg 1-9, Morsumer Weg…","""Hamburg""",22117,"""legacy""",0
"""4008231VG""",2022-03-14,996.6,"""gas""","""Tinnumer Weg 1-9, Morsumer Weg…","""Hamburg""",22117,"""legacy""",0


## Building Features

### Legacy Data

In [None]:
leg_data = pl.read_csv( / "legacy_daily.csv")
leg_data_meta = leg_data.group_by(
    ["id", "ort", "adresse", "plz", "primary_energy", "qmbehfl", "anzlwhg", "co2koeffizient"]).agg().with_columns(
    pl.lit("Mehrfamilienhaus").alias("typ")).rename({"anzlwhg": "anzahlwhg"})
leg_data_meta = leg_data_meta.rename({"qmbehfl": "heated_area"}).cast(
    {"heated_area": pl.Float64, "plz": pl.String}).with_columns(pl.lit("leg").alias("source"))
leg_data_meta

In [None]:
leg_data_meta.write_csv( / "legacy_meta.csv")

### Kinergy Data

In [31]:
from src.energy_forecast.config import DATA_DIR
import json

with open(DATA_DIR / "kinergy" / "kinergy_eco_u_list.json", "r", encoding="UTF-8") as f:
    eco_u_data = json.loads(f.read())
item_list = [i for k, i in eco_u_data.items()]
kinergy_meta = pl.from_dicts(item_list).select(
    ["hash", "ort", "name", "plz", "anzahlwhg", "typ", "complexity", "complexity_score", "primary_energy",
     "heated_area", "renewable_energy_used", "has_pwh", "pwh_type", "netz_nummer"])

bem_meta = pl.read_csv(DATA_DIR / "kinergy" / "berlin_fernwärmenetz_info.csv").rename(
    {"Netznummer": "netz_nummer"}).select(["netz_nummer", "min_vorlauf_temp", "max_vorlauf_temp"])
kinergy_meta = kinergy_meta.join(bem_meta, on="netz_nummer", how="left")
kinergy_meta = kinergy_meta.rename({"hash": "id", "name": "adresse"}).with_columns(pl.lit("kin").alias("source"))
kinergy_meta

hash,ort,name,plz,anzahlwhg,typ,complexity,complexity_score,primary_energy,heated_area,renewable_energy_used,has_pwh,pwh_type,netz_nummer,min_vorlauf_temp,max_vorlauf_temp
str,str,str,str,i64,str,i64,f64,str,f64,bool,bool,str,i64,str,str
"""1 # JHe51""","""Bamberg""","""Hegelstraße 51""","""96052""",134,"""Studentenwohnheim""",0,25.0,"""district heating""",2736.65,false,true,"""central""",,,
"""2 # JMe4""","""Erlangen""","""Mittlere Schulstraße 4""","""91054""",64,"""Studentenwohnheim""",1,34.0,"""gas""",1201.78,false,true,"""central""",,,
"""3 # JOe11""","""Erlangen""","""Otto-Goetze-Straße 11""","""91054""",168,"""Studentenwohnheim""",1,40.5,"""gas""",3141.78,false,true,"""central""",,,
"""4 # JSe21/23""","""Bayreuth""","""Schellingstraße 21/23""","""95447""",60,"""Studentenwohnheim""",0,28.0,"""gas""",2488.92,false,true,"""central""",,,
"""5 # WFe21-25""","""Würzburg""","""Friedrichstraße 21-25""","""97082""",64,"""Mehrfamilienhaus""",1,35.0,"""district heating""",3600.0,true,true,"""decentral""",,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""16 # PRe45""","""Plön""","""Rautenbergstraße 45""","""24306""",0,"""""",0,20.5,"""gas""",,false,true,"""central""",,,
"""17 # SFp36""","""Hamburg""","""Foorthkamp 36""","""22419""",0,"""Schule""",1,35.0,"""gas""",6428.0,false,true,"""central""",,,
"""18 # SGe171""","""Hamburg""","""Gaußstraße 171""","""22765""",0,"""Schule""",1,35.5,"""gas""",6140.0,false,false,,,,
"""19 # SKg63""","""Hamburg""","""Kapellenweg 63""","""21077""",0,"""Schule""",1,42.0,"""gas""",7511.0,false,true,"""central""",,,


In [19]:
kinergy_meta.filter(pl.col("primary_energy") == "district heating")

hash,ort,name,plz,anzahlwhg,typ,complexity,complexity_score,primary_energy,heated_area,renewable_energy_used,has_pwh,pwh_type,netz_nummer,min_vorlauf_temp,max_vorlauf_temp
str,str,str,str,i64,str,i64,f64,str,f64,bool,bool,str,i64,str,str
"""1 # JHe51""","""Bamberg""","""Hegelstraße 51""","""96052""",134,"""Studentenwohnheim""",0,25.0,"""district heating""",2736.65,False,True,"""central""",,,
"""5 # WFe21-25""","""Würzburg""","""Friedrichstraße 21-25""","""97082""",64,"""Mehrfamilienhaus""",1,35.0,"""district heating""",3600.0,True,True,"""decentral""",,,
"""9 # BMr03""","""Berlin""","""Marzahner Chaussee 231 Sportha…","""12681""",0,"""Schule""",0,27.5,"""district heating""",1141.0,False,True,"""central""",2100.0,""" 80 °C""",""" 135 °C"""
"""10 # BSeH1+3+4""","""Berlin""","""Straßmannstraße 14-16 H1+3+4""","""10249""",24,"""Schule""",0,23.0,"""district heating""",9456.0,False,False,,2600.0,""" 80 °C""",""" 135 °C"""
"""11 # BSeH2""","""Berlin""","""Straßmannstraße 14-16 H2""","""10249""",0,"""Schule""",0,27.0,"""district heating""",2360.0,False,True,"""central""",2600.0,""" 80 °C""",""" 135 °C"""
"""12 # BTr9""","""Berlin""","""Trebbiner Str.9""","""10963""",0,"""Museum""",0,26.0,"""district heating""",6000.0,False,False,,2601.0,""" 80 °C""",""" 135 °C"""


### District Heating Data

In [1]:
import polars as pl
from src.energy_forecast.config import RAW_DATA_DIR

dh_meta = pl.read_csv(RAW_DATA_DIR / "district_heating_meta.csv").with_columns(
    pl.lit("Mehrfamilienhaus").alias("typ"),
    pl.lit(75).alias("min_vorlauf_temp"),
    pl.lit(90).alias("max_vorlauf_temp")
).rename({"eco_u_id": "id"}).select(
    ["id", "city", "address", "postal_code", "typ", "min_vorlauf_temp", "max_vorlauf_temp"])
dh_meta = dh_meta.rename({"address": "adresse", "postal_code": "plz", "city": "ort"}).select(
    ["id", "adresse", "ort", "plz", "typ", "min_vorlauf_temp", "max_vorlauf_temp"]).cast(
    {"plz": pl.String, "min_vorlauf_temp": pl.String, "max_vorlauf_temp": pl.String}).with_columns(
    pl.lit("dh").alias("source"),
    pl.lit("district heating").alias("primary_energy")
)
dh_meta

[32m2025-02-19 08:58:35.832[0m | [1mINFO    [0m | [36msrc.energy_forecast.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: /home/marja/PycharmProjects/energy-forecast-wahl[0m


id,adresse,ort,plz,typ,min_vorlauf_temp,max_vorlauf_temp,source,primary_energy
str,str,str,str,str,str,str,str,str
"""8f7b3862-a50d-44eb-8ac9-de0cf4…","""Kielort 20""","""Norderstedt""","""22850""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
"""42d6efdc-d590-40b7-af9a-90121d…","""Moorbekstraße 19""","""Norderstedt""","""22846""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
"""c00c8cba-b6de-4c10-89c0-e92312…","""Moorbekstraße 29""","""Norderstedt""","""22846""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
"""7bdbc8ee-00fb-4795-99cb-c1739f…","""Moorbekstraße 31""","""Norderstedt""","""22846""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
"""d00d6502-a08d-45df-99e3-7d8cd5…","""Moorbekstraße 17""","""Norderstedt""","""22846""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
…,…,…,…,…,…,…,…,…
"""82a01deb-7c9d-4e87-a79a-4693bd…","""Waldstraße 81""","""Norderstedt""","""22846""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
"""cae17ef4-cfad-4446-8b09-3cf946…","""Hasenstieg 13""","""Norderstedt""","""22846""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
"""a9644794-439b-401c-b879-8c0225…","""Kielort 25""","""Norderstedt""","""22850""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""
"""561a9d67-5802-4a54-ae7d-0a7822…","""Segeberger Chaussee 104b""","""Norderstedt""","""22850""","""Mehrfamilienhaus""","""75""","""90""","""dh""","""district heating"""


In [4]:
from src.energy_forecast.config import DATA_DIR

df_buildings = pl.read_csv(DATA_DIR / "lod2" / "building_data.csv").drop_nulls().rename({"Address": "adresse"})
dh_meta_lod = dh_meta.select(pl.col("adresse")).join(df_buildings, on="adresse", how="left").drop_nulls().unique(["adresse"]).sort(pl.col("adresse"))
dh_meta_lod

adresse,Building ID,Country,postal_code,Function,Height (m),Storeys Above Ground,ground_surface
str,str,str,str,str,f64,str,f64
"""Am Kielortplatz 1""","""DESHPDHK0005vC5p""","""Deutschland""","""22850""","""31001_1010""",26.345,"""8""",547.960037
"""Friedrichsgaber Weg 424""","""DESHPDHK0000Z7oH""","""Deutschland""","""22846""","""31001_1010""",18.084,"""4""",1144.430904
"""Friedrichsgaber Weg 453""","""DESHPDHK0000Z6T7""","""Deutschland""","""22846""","""31001_1010""",25.786,"""4""",1128.89042
"""Hasenstieg 13""","""DESHPDHK0000Z7pB""","""Deutschland""","""22846""","""31001_3021""",8.586,"""1""",1714.14213
"""Kielort 16""","""DESHPDHK00012IWL""","""Deutschland""","""22850""","""31001_1010""",22.541,"""5""",1847.223805
…,…,…,…,…,…,…,…
"""Röntgengang 16""","""DESHPDHK0007aem1""","""Deutschland""","""24635""","""31001_1010""",11.636,"""3""",303.146873
"""Segeberger Chaussee 132""","""DESHPDHK00014I3x""","""Deutschland""","""22850""","""31001_2120""",4.305,"""1""",421.969325
"""Ulzburger Straße 457""","""DESHPDHK0006FU8b""","""Deutschland""","""22846""","""31001_1010""",10.031,"""3""",320.653675
"""Ulzburger Straße 461""","""DESHPDHK0006FU6t""","""Deutschland""","""22846""","""31001_1010""",13.537,"""4""",653.00923


In [5]:
dh_meta_lod.write_csv(RAW_DATA_DIR / "dh_meta_lod.csv")

### Merge

In [34]:
kinergy_meta.schema

Schema([('id', String),
        ('ort', String),
        ('adresse', String),
        ('plz', String),
        ('anzahlwhg', Int64),
        ('typ', String),
        ('complexity', Int64),
        ('complexity_score', Float64),
        ('primary_energy', String),
        ('heated_area', Float64),
        ('renewable_energy_used', Boolean),
        ('has_pwh', Boolean),
        ('pwh_type', String),
        ('netz_nummer', Int64),
        ('min_vorlauf_temp', String),
        ('max_vorlauf_temp', String),
        ('source', String)])

In [35]:
leg_data_meta.schema

Schema([('id', String),
        ('ort', String),
        ('adresse', String),
        ('plz', String),
        ('primary_energy', String),
        ('heated_area', Float64),
        ('anzlwhg', Int64),
        ('co2koeffizient', Float64),
        ('typ', String),
        ('source', String)])

In [36]:
dh_meta.schema

Schema([('id', String),
        ('adresse', String),
        ('ort', String),
        ('plz', String),
        ('typ', String),
        ('min_vorlauf_temp', String),
        ('max_vorlauf_temp', String),
        ('source', String),
        ('primary_energy', String)])

In [42]:
df_meta = pl.concat([leg_data_meta, kinergy_meta, dh_meta], how="diagonal")
df_meta

id,ort,adresse,plz,primary_energy,heated_area,anzahlwhg,co2koeffizient,typ,source,complexity,complexity_score,renewable_energy_used,has_pwh,pwh_type,netz_nummer,min_vorlauf_temp,max_vorlauf_temp
str,str,str,str,str,f64,i64,f64,str,str,i64,f64,bool,bool,str,i64,str,str
"""400308PVG""","""Hamburg""","""Martinistraße 44""","""20251""","""gas""",0.0,0,2.26,"""Mehrfamilienhaus""","""leg""",,,,,,,,
"""400690GVG""","""Hamburg""","""Op´n Hainholt 4-18""","""22589""","""gas""",18493.0,290,2.26,"""Mehrfamilienhaus""","""leg""",,,,,,,,
"""400356PVG""","""Elmshorn""","""Fehrsstraße 7""","""25336""","""gas""",1215.0,23,2.26,"""Mehrfamilienhaus""","""leg""",,,,,,,,
"""400131GVG""","""Lübeck""","""Brandenbaumer Landstraße 177""","""23566""","""gas""",6387.0,120,2.26,"""Mehrfamilienhaus""","""leg""",,,,,,,,
"""4008231VG""","""Hamburg""","""Tinnumer Weg 1-9, Morsumer Weg…","""22117""","""gas""",19530.0,262,2.26,"""Mehrfamilienhaus""","""leg""",,,,,,,,
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""82a01deb-7c9d-4e87-a79a-4693bd…","""Norderstedt""","""Waldstraße 81""","""22846""","""district heating""",,,,"""Mehrfamilienhaus""","""dh""",,,,,,,"""75""","""90"""
"""cae17ef4-cfad-4446-8b09-3cf946…","""Norderstedt""","""Hasenstieg 13""","""22846""","""district heating""",,,,"""Mehrfamilienhaus""","""dh""",,,,,,,"""75""","""90"""
"""a9644794-439b-401c-b879-8c0225…","""Norderstedt""","""Kielort 25""","""22850""","""district heating""",,,,"""Mehrfamilienhaus""","""dh""",,,,,,,"""75""","""90"""
"""561a9d67-5802-4a54-ae7d-0a7822…","""Norderstedt""","""Segeberger Chaussee 104b""","""22850""","""district heating""",,,,"""Mehrfamilienhaus""","""dh""",,,,,,,"""75""","""90"""


In [43]:
df_meta.describe()

statistic,id,ort,adresse,plz,primary_energy,heated_area,anzahlwhg,co2koeffizient,typ,source,complexity,complexity_score,renewable_energy_used,has_pwh,pwh_type,netz_nummer,min_vorlauf_temp,max_vorlauf_temp
str,str,str,str,str,str,f64,f64,f64,str,str,f64,f64,f64,f64,str,f64,str,str
"""count""","""139""","""139""","""139""","""139""","""139""",48.0,49.0,29.0,"""139""","""139""",20.0,20.0,20.0,20.0,"""16""",4.0,"""94""","""94"""
"""null_count""","""0""","""0""","""0""","""0""","""0""",91.0,90.0,110.0,"""0""","""0""",119.0,119.0,119.0,119.0,"""123""",135.0,"""45""","""45"""
"""mean""",,,,,,6634.015833,85.979592,2.26,,,0.5,29.3,0.15,0.8,,2475.25,,
"""std""",,,,,,9154.866906,143.185557,4.5195e-16,,,0.606977,10.181769,,,,250.167111,,
"""min""","""0c9ad311-b86f-4371-a695-512ca4…","""Bamberg""","""Alter Sportplatz 1-5""","""10249""","""district heating""",0.0,0.0,2.26,"""""","""dh""",0.0,12.5,0.0,0.0,"""central""",2100.0,""" 80 °C""",""" 135 °C"""
"""25%""",,,,,,1812.0,0.0,2.26,,,0.0,23.0,,,,2600.0,,
"""50%""",,,,,,3141.78,31.0,2.26,,,0.0,28.0,,,,2600.0,,
"""75%""",,,,,,6904.0,96.0,2.26,,,1.0,35.5,,,,2600.0,,
"""max""","""fb684f25-a63d-4d3e-9277-6d759b…","""Würzburg""","""Wilhelmstraße 33-41""","""97084""","""gas""",49339.0,697.0,2.26,"""Studentenwohnheim""","""leg""",2.0,51.5,1.0,1.0,"""decentral""",2601.0,"""75""","""90"""
