# Built environment stock for Dutch municipalities

**TO DO** | Add introduction

In [1]:
# internal modules
import csv
import os
import sys

# external modules
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import seaborn as sns
import time
import xlwings as xw
import yaml
from pathlib import Path

# project modules
import config.config as config
from src.checks import Checker
from src.extract import PblService
from src.transform import Transformer

Before we start we need to specify which datasets we want to create or update. This can be done with the `data.csv` file in the `config` directory. Here you can specify the geo ID, parent dataset and name for each region.

Also, make sure to specify the parent dataset and the year in the cell below.

**TO DO** | Add variable all_municipalities (based on the default etlocal config)

In [2]:
# Specify the year
year = 2019

# Specify the CSV-separator (presumably either "," or ";")
sep=","

# Either specify the municipalities by using the data.csv file in the config directory.
# Make sure to specify the right separator in the pd.read_csv() function.
path = Path("config", "data.csv")
municipalities = pd.read_csv(path, sep=",")['geo_id'].to_list()

# Preview municipality geo IDs
# municipalities

The "referentieverbruiken" study by PBL is used to calculate the housing stock for each municipality.

### Residences (or households)

#### Extract

First, we extract the raw **EP-online** data in csv format (downloaded from the website). Then, we convert this into a dataframe.

In [3]:
# Specify the path to the raw data file
path = Path("data", "raw", "v20231201_v2_csv.csv")

# Extract the data and turn it into a dataframe
df_raw_ep_online = pd.read_csv(path, header=[0], sep=";", low_memory=False)

In [4]:
# Preview EP-online data
df_raw_ep_online.head()

Unnamed: 0,Pand_opnamedatum,Pand_opnametype,Pand_status,Pand_berekeningstype,Pand_energieindex,Pand_energieklasse,Pand_energielabel_is_prive,Pand_is_op_basis_van_referentie_gebouw,Pand_gebouwklasse,Meting_geldig_tot,...,Pand_primaire_fossiele_energie,Pand_eis_primaire_fossiele_energie,Pand_primaire_fossiele_energie_EMG_forfaitair,Pand_aandeel_hernieuwbare_energie,Pand_eis_aandeel_hernieuwbare_energie,Pand_aandeel_hernieuwbare_energie_EMG_forfaitair,Pand_temperatuuroverschrijding,Pand_eis_temperatuuroverschrijding,Pand_warmtebehoefte,Pand_energieindex_met_EMG_forfaitair
0,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,8.4,30.0,,92.3,50.0,,0.0,1.2,53.04,
1,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,3.62,30.0,,95.8,50.0,,0.0,1.2,32.33,
2,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,3.62,30.0,,95.8,50.0,,0.0,1.2,32.33,
3,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,3.62,30.0,,95.8,50.0,,0.0,1.2,32.33,
4,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,9.0,30.0,,91.6,50.0,,0.0,1.2,51.1,


Because the EP-online data doesn't include all desired data we need to eventually enrich this data with the municipal data from the **PBL referentieverbruiken**. Hence, we need to extract a csv for each municipality and combine it into one big dataframe.

In [5]:
# Call the PBL referentieverbruiken service and store the raw csv data in the "data / raw" directory
# It's not necessary to run this step if the raw csv data is yet present in the directory
# for municipality in municipalities:
#     PblService(municipality).call('csv')

In [6]:
# TODO: Refactor so that we can use the all_input.csv instead of looping over all municipality csv files

# For all municipalities, load the CSV into a dataframe and eventually concat them all.
dfs = []
for municipality in municipalities:
    try: 
        # Specify path to the CSV file
        path = Path("data", "raw", f"{municipality}.csv")

        # Write the dataframe to this path
        df_municipality = pd.read_csv(path, header=[0], sep=";")

        # Add the municipal geo ID to the dataframe as a new column
        df_municipality['gemeente'] = municipality

        # Append dataframe to the list
        dfs.append(df_municipality)
    except FileNotFoundError:
        print(f"Data for {municipality} is not available")
        pass

# Concatenate list of dataframes to one big dataframe
df_raw_pbl = pd.concat(dfs)

Data for GM1979 is not available


In [7]:
# Preview PBL data
df_raw_pbl.head()

Unnamed: 0.1,Unnamed: 0,Woning/vbo_id,Adres/Postcode_huisnummer,Aantal bewoners/Aantal bewoners,Regio/gemeente,Regio/wijk,Regio/buurtcode,Woningkenmerken/Kenmerken,Woningkenmerken/woningtype,Woningkenmerken/bouwperiode,...,Metervraag olie/ruimteverwarming piek,Metervraag totaal/aardgas,Metervraag totaal/elektriciteit,Metervraag totaal/warmtenet,Metervraag totaal/waterstof,Metervraag totaal/biomassa,Metervraag totaal/olie,Metervraag totaal/totaal,Regionale klimaatcorrectie/regionale klimaatcorrectie,gemeente
0,0,'0003010000125985','9901KB_16',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,23.3,0.1,0.0,0.0,0.0,0.0,23.4,1.1,GM0003
1,1,'0003010000125986','9901KB_20',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,25.6,0.1,0.0,0.0,0.0,0.0,25.8,1.1,GM0003
2,2,'0003010000125991','9901AD_15',2.0,3,300,'BU00030000','rijwoning tussen_voor 1930_koop',4,0,...,0.0,37.1,0.2,0.0,0.0,0.0,0.0,37.2,1.1,GM0003
3,3,'0003010000125992','9901AD_6',3.0,3,300,'BU00030000','2 onder 1 kap_1946 - 1964_koop',2,2,...,0.0,59.7,0.2,0.0,0.0,0.0,0.0,59.9,1.1,GM0003
4,4,'0003010000125994','9901AD_10_a',2.0,3,300,'BU00030000','Vrijstaand_1930 - 1945_parthuur',1,1,...,0.0,52.5,0.2,0.0,0.0,0.0,0.0,52.8,1.1,GM0003


#### Transform
The raw data should be filtered, cleaned, combined and enriched before we can use it.

##### Cleaning and preprocessing

Let's start off with the **EP-online** data!

In [8]:
df_raw_ep_online.head(3)

Unnamed: 0,Pand_opnamedatum,Pand_opnametype,Pand_status,Pand_berekeningstype,Pand_energieindex,Pand_energieklasse,Pand_energielabel_is_prive,Pand_is_op_basis_van_referentie_gebouw,Pand_gebouwklasse,Meting_geldig_tot,...,Pand_primaire_fossiele_energie,Pand_eis_primaire_fossiele_energie,Pand_primaire_fossiele_energie_EMG_forfaitair,Pand_aandeel_hernieuwbare_energie,Pand_eis_aandeel_hernieuwbare_energie,Pand_aandeel_hernieuwbare_energie_EMG_forfaitair,Pand_temperatuuroverschrijding,Pand_eis_temperatuuroverschrijding,Pand_warmtebehoefte,Pand_energieindex_met_EMG_forfaitair
0,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,8.4,30.0,,92.3,50.0,,0.0,1.2,53.04,
1,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,3.62,30.0,,95.8,50.0,,0.0,1.2,32.33,
2,20231130,Detailopname,Vergunningsaanvraag,NTA 8800:2023 (detailopname woningbouw),,A+++,,0,W,20331130,...,3.62,30.0,,95.8,50.0,,0.0,1.2,32.33,


The dataset still has a lot of columns we're not interested in. Let's specify the ones we want to keep and filter for those:

* Pand_energieindex
* Pand_energieklasse
* Pand_gebouwklasse
* Pand_bagverblijfsobjectid
* Pand_gebouwtype
* Pand_gebruiksoppervlakte_thermische_zone
* Pand_warmtebehoefte

In [9]:
# Specify the columns to keep
columns_to_keep = [
    'Pand_energieindex',
    'Pand_energieklasse',
    'Pand_gebouwklasse',
    'Pand_bagverblijfsobjectid',
    'Pand_gebouwtype',
    'Pand_gebruiksoppervlakte_thermische_zone',
    'Pand_warmtebehoefte'
]

df_filtered_ep_online = df_raw_ep_online[columns_to_keep]

# Preview data
df_filtered_ep_online.head(3)

Unnamed: 0,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_bagverblijfsobjectid,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
0,,A+++,W,,Rijwoning hoek,114.67,53.04
1,,A+++,W,,Rijwoning tussen,109.4,32.33
2,,A+++,W,,Rijwoning tussen,109.4,32.33


Then, we're only interested in existing buildings so we should drop the buildings that do not have a BAG VBO ID value.

In [10]:
# Drop buildings for which no BAG VBO ID is specified
df_cleaned_ep_online = df_filtered_ep_online.dropna(subset=['Pand_bagverblijfsobjectid'])

# Preview data
df_cleaned_ep_online

Unnamed: 0,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_bagverblijfsobjectid,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
224762,,C,U,3.630100e+14,,1735.09,112.11
224763,,A++,W,3.630100e+14,Appartement,145.08,44.16
224764,,A++,W,3.630100e+14,Appartement,145.08,46.42
224765,,A++,W,3.630100e+14,Appartement,145.08,47.03
224766,,A+++,W,3.630100e+14,Appartement,292.63,68.95
...,...,...,...,...,...,...,...
5441952,,E,W,1.651010e+15,Vrijstaande woning,,
5441953,,A,W,1.651010e+15,Vrijstaande woning,,
5441954,0.93,A,W,1.651010e+15,Vrijstaande woning,,
5441955,,B,W,1.651010e+15,Vrijstaande woning,,


Finally, we want to set the index to the BAG VBO ID values. Also, we want to convert the scientific notation to an integer representation.

In [11]:
# Convert the BAG VBO ID values into integer representations
df_cleaned_ep_online['Pand_bagverblijfsobjectid'] = df_cleaned_ep_online['Pand_bagverblijfsobjectid'].round().astype(int)

# Preview data
df_cleaned_ep_online.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_ep_online['Pand_bagverblijfsobjectid'] = df_cleaned_ep_online['Pand_bagverblijfsobjectid'].round().astype(int)


Unnamed: 0,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_bagverblijfsobjectid,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
224762,,C,U,363010000977860,,1735.09,112.11
224763,,A++,W,363010000948617,Appartement,145.08,44.16
224764,,A++,W,363010000618803,Appartement,145.08,46.42


There seems to be a vbo with an insanely high heat demand. Let's drop this outlier.

In [12]:
# TODO: send an e-mail about this outlier to EP-online
df_cleaned_ep_online[df_cleaned_ep_online['Pand_bagverblijfsobjectid']==1655010000535141]

Unnamed: 0,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_bagverblijfsobjectid,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
3079309,,G,W,1655010000535141,Rijwoning hoek,155.92,3749115.0


In [13]:
# Drop this outlier
df_cleaned_ep_online = df_cleaned_ep_online[df_cleaned_ep_online['Pand_bagverblijfsobjectid']!=1655010000535141]

# Preview dataframe
df_cleaned_ep_online

Unnamed: 0,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_bagverblijfsobjectid,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
224762,,C,U,363010000977860,,1735.09,112.11
224763,,A++,W,363010000948617,Appartement,145.08,44.16
224764,,A++,W,363010000618803,Appartement,145.08,46.42
224765,,A++,W,363010012087723,Appartement,145.08,47.03
224766,,A+++,W,363010012087724,Appartement,292.63,68.95
...,...,...,...,...,...,...,...
5441952,,E,W,1651010000022494,Vrijstaande woning,,
5441953,,A,W,1651010000022503,Vrijstaande woning,,
5441954,0.93,A,W,1651010000022505,Vrijstaande woning,,
5441955,,B,W,1651010000022498,Vrijstaande woning,,


In [14]:
# Create a copy and set the index to the BAG VBO ID values
df_housing_stock_ep_online = df_cleaned_ep_online.copy()
df_housing_stock_ep_online.set_index('Pand_bagverblijfsobjectid', inplace=True)

# Preview data
df_housing_stock_ep_online

Unnamed: 0_level_0,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
Pand_bagverblijfsobjectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
363010000977860,,C,U,,1735.09,112.11
363010000948617,,A++,W,Appartement,145.08,44.16
363010000618803,,A++,W,Appartement,145.08,46.42
363010012087723,,A++,W,Appartement,145.08,47.03
363010012087724,,A+++,W,Appartement,292.63,68.95
...,...,...,...,...,...,...
1651010000022494,,E,W,Vrijstaande woning,,
1651010000022503,,A,W,Vrijstaande woning,,
1651010000022505,0.93,A,W,Vrijstaande woning,,
1651010000022498,,B,W,Vrijstaande woning,,


Now, let's continue with the **PBL** data!

In [15]:
df_raw_pbl.head(3)

Unnamed: 0.1,Unnamed: 0,Woning/vbo_id,Adres/Postcode_huisnummer,Aantal bewoners/Aantal bewoners,Regio/gemeente,Regio/wijk,Regio/buurtcode,Woningkenmerken/Kenmerken,Woningkenmerken/woningtype,Woningkenmerken/bouwperiode,...,Metervraag olie/ruimteverwarming piek,Metervraag totaal/aardgas,Metervraag totaal/elektriciteit,Metervraag totaal/warmtenet,Metervraag totaal/waterstof,Metervraag totaal/biomassa,Metervraag totaal/olie,Metervraag totaal/totaal,Regionale klimaatcorrectie/regionale klimaatcorrectie,gemeente
0,0,'0003010000125985','9901KB_16',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,23.3,0.1,0.0,0.0,0.0,0.0,23.4,1.1,GM0003
1,1,'0003010000125986','9901KB_20',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,25.6,0.1,0.0,0.0,0.0,0.0,25.8,1.1,GM0003
2,2,'0003010000125991','9901AD_15',2.0,3,300,'BU00030000','rijwoning tussen_voor 1930_koop',4,0,...,0.0,37.1,0.2,0.0,0.0,0.0,0.0,37.2,1.1,GM0003


First we need to drop the buildings that were built after 2019 (the year of the dataset).

In [16]:
# Keep the buildings that were built in 2019 or before.
df_cleaned_pbl = df_raw_pbl[df_raw_pbl['Woningkenmerken/bouwjaar'] <= 2019]

# Preview data
df_cleaned_pbl.head(3)

Unnamed: 0.1,Unnamed: 0,Woning/vbo_id,Adres/Postcode_huisnummer,Aantal bewoners/Aantal bewoners,Regio/gemeente,Regio/wijk,Regio/buurtcode,Woningkenmerken/Kenmerken,Woningkenmerken/woningtype,Woningkenmerken/bouwperiode,...,Metervraag olie/ruimteverwarming piek,Metervraag totaal/aardgas,Metervraag totaal/elektriciteit,Metervraag totaal/warmtenet,Metervraag totaal/waterstof,Metervraag totaal/biomassa,Metervraag totaal/olie,Metervraag totaal/totaal,Regionale klimaatcorrectie/regionale klimaatcorrectie,gemeente
0,0,'0003010000125985','9901KB_16',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,23.3,0.1,0.0,0.0,0.0,0.0,23.4,1.1,GM0003
1,1,'0003010000125986','9901KB_20',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,25.6,0.1,0.0,0.0,0.0,0.0,25.8,1.1,GM0003
2,2,'0003010000125991','9901AD_15',2.0,3,300,'BU00030000','rijwoning tussen_voor 1930_koop',4,0,...,0.0,37.1,0.2,0.0,0.0,0.0,0.0,37.2,1.1,GM0003


Here, we should make sure the BAG VBO IDs have the same format as in the EP-online data. Thus, we should remove the accents and convert the strings into integers.

In [17]:
# Define method to remove the quotation marks and the first zero of the BAG VBO ID 
def clean_pbl_bag_id(bag_id):
    return bag_id[1:len(bag_id)-1]

# Apply method to the BAG VBO ID values and turn into integers
df_cleaned_pbl['Woning/vbo_id'] = df_cleaned_pbl['Woning/vbo_id'].apply(clean_pbl_bag_id).astype(int)

# Preview data
df_cleaned_pbl.head(3)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned_pbl['Woning/vbo_id'] = df_cleaned_pbl['Woning/vbo_id'].apply(clean_pbl_bag_id).astype(int)


Unnamed: 0.1,Unnamed: 0,Woning/vbo_id,Adres/Postcode_huisnummer,Aantal bewoners/Aantal bewoners,Regio/gemeente,Regio/wijk,Regio/buurtcode,Woningkenmerken/Kenmerken,Woningkenmerken/woningtype,Woningkenmerken/bouwperiode,...,Metervraag olie/ruimteverwarming piek,Metervraag totaal/aardgas,Metervraag totaal/elektriciteit,Metervraag totaal/warmtenet,Metervraag totaal/waterstof,Metervraag totaal/biomassa,Metervraag totaal/olie,Metervraag totaal/totaal,Regionale klimaatcorrectie/regionale klimaatcorrectie,gemeente
0,0,3010000125985,'9901KB_16',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,23.3,0.1,0.0,0.0,0.0,0.0,23.4,1.1,GM0003
1,1,3010000125986,'9901KB_20',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,...,0.0,25.6,0.1,0.0,0.0,0.0,0.0,25.8,1.1,GM0003
2,2,3010000125991,'9901AD_15',2.0,3,300,'BU00030000','rijwoning tussen_voor 1930_koop',4,0,...,0.0,37.1,0.2,0.0,0.0,0.0,0.0,37.2,1.1,GM0003


In [18]:
# Set a multi-level index based on the municipal code and the BAG VBO ID
index_columns = ['gemeente', 'Woning/vbo_id']
df_cleaned_pbl.set_index(index_columns, inplace=True)

# Drop the first unnamed column
df_cleaned_pbl = df_cleaned_pbl.iloc[:, 1:]

# Preview data
df_cleaned_pbl.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Adres/Postcode_huisnummer,Aantal bewoners/Aantal bewoners,Regio/gemeente,Regio/wijk,Regio/buurtcode,Woningkenmerken/Kenmerken,Woningkenmerken/woningtype,Woningkenmerken/bouwperiode,Woningkenmerken/bouwjaar,Woningkenmerken/schillabel,...,Metervraag olie/ruimteverwarming basis,Metervraag olie/ruimteverwarming piek,Metervraag totaal/aardgas,Metervraag totaal/elektriciteit,Metervraag totaal/warmtenet,Metervraag totaal/waterstof,Metervraag totaal/biomassa,Metervraag totaal/olie,Metervraag totaal/totaal,Regionale klimaatcorrectie/regionale klimaatcorrectie
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
GM0003,3010000125985,'9901KB_16',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,2002,B,...,0.0,0.0,23.3,0.1,0.0,0.0,0.0,0.0,23.4,1.1
GM0003,3010000125986,'9901KB_20',1.0,3,300,'BU00030000','meergezins: laag en midden_2000 - 2005_wooncorp',5,7,2002,C,...,0.0,0.0,25.6,0.1,0.0,0.0,0.0,0.0,25.8,1.1
GM0003,3010000125991,'9901AD_15',2.0,3,300,'BU00030000','rijwoning tussen_voor 1930_koop',4,0,1925,G,...,0.0,0.0,37.1,0.2,0.0,0.0,0.0,0.0,37.2,1.1


This dataset also has a lot of columns we're not interested in. Let's specify the ones we want to keep and filter for those:

* Woningkenmerken/eigendom
* Woningkenmerken/oppervlakte
* Woningkenmerken/schillabel
* Regionale klimaatcorrectie/regionale klimaatcorrectie
* Functionele vraag/Lokale praktijkfactor
* Functionele vraag/ruimteverwarming
* Woningkenmerken/bouwjaar
* Woningkenmerken/woningtype

In [19]:
# Specify the columns to keep
columns_to_keep = [
    'Woningkenmerken/eigendom',
    'Woningkenmerken/oppervlakte',
    'Woningkenmerken/schillabel',
    'Regionale klimaatcorrectie/regionale klimaatcorrectie',
    'Functionele vraag/Lokale praktijkfactor',
    'Functionele vraag/ruimteverwarming',
    'Woningkenmerken/bouwjaar',
    'Woningkenmerken/woningtype',
]

df_housing_stock_pbl = df_cleaned_pbl[columns_to_keep]

# Preview data
df_housing_stock_pbl.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
GM0003,3010000125985,2,69,B,1.1,1.1,19.0,2002,5
GM0003,3010000125986,2,69,C,1.1,1.1,21.4,2002,5
GM0003,3010000125991,0,66,G,1.1,1.0,30.2,1925,4


##### Combining the EP-online and PBL data

We have two dataframes that we want to merge by vbo id value:
* df_housing_stock_pbl
* df_housing_stock_ep_online

In [20]:
# Merge PBL and EP-online dataframes
df_housing_stock_merged = pd.merge(
    df_housing_stock_pbl.reset_index(), 
    df_housing_stock_ep_online.reset_index(), 
    left_on=df_housing_stock_pbl.reset_index()['Woning/vbo_id'], 
    right_on=df_housing_stock_ep_online.reset_index()['Pand_bagverblijfsobjectid'],
    how='left'
)

# Preview merged data
df_housing_stock_merged

Unnamed: 0,key_0,gemeente,Woning/vbo_id,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_bagverblijfsobjectid,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
0,3010000125985,GM0003,3010000125985,2,69,B,1.1,1.1,19.0,2002,5,3.010000e+12,1.38,B,W,Flatwoning (overig),,
1,3010000125986,GM0003,3010000125986,2,69,C,1.1,1.1,21.4,2002,5,3.010000e+12,1.44,C,W,Flatwoning (overig),,
2,3010000125991,GM0003,3010000125991,0,66,G,1.1,1.0,30.2,1925,4,3.010000e+12,,G,W,Vrijstaande woning,,
3,3010000125992,GM0003,3010000125992,0,153,x,1.1,0.9,53.0,1950,2,,,,,,,
4,3010000125994,GM0003,3010000125994,1,45,x,1.1,0.9,46.7,1930,1,3.010000e+12,2.30,E,W,Maisonnette,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7866942,193010000105649,GM0193,193010000105649,0,59,x,1.0,1.0,29.2,1901,4,,,,,,,
7866943,193010000105687,GM0193,193010000105687,0,233,B,1.0,1.1,22.8,1995,1,1.930100e+14,,B,W,Vrijstaande woning,,
7866944,193010000105923,GM0193,193010000105923,0,136,x,1.0,1.0,14.5,1987,1,,,,,,,
7866945,193010000105959,GM0193,193010000105959,1,42,A,1.0,1.0,21.2,2007,5,,,,,,,


The data by PBL and EP-online use the latest municipal codes whereas the ETM uses the old codes. Some municipalities have merged, etc. Therefore we should correct this in the data.

In [21]:
# old municipality code (to be used in the data): new municipality code (from which the average net heat demand should be used for the old municipality code)
correction = {
    
    # Fusie van Appingedam, Delfzijl en Loppersum tot Eemsdelta (2021)
    'GM0003': 'GM1979',
    'GM0010': 'GM1979',
    'GM0024': 'GM1979',
    
    # Wat te doen met de opgeheven gemeente Haaren (2021)?
    
    
    # Fusie van Beemster met Purmerend (2022)
    # 'GM0370': 'GM0439',
    
    # Fusie van Weesp met Amsterdam (2022)
    # 'GM0457': 'GM0363',
    
    # Fusie van Boxmeer, Cuijk, Grave, Mill en Sint Hubert en Sint Anthonis tot Land van Cuijk (2022)
    # 'GM0756': 'GM1982',
    # 'GM1684': 'GM1982',
    # 'GM0786': 'GM1982',
    # 'GM0815': 'GM1982',
    # 'GM1702': 'GM1982',
    
    # Fusie van Heerhugowaard en Langedijk tot Dijk en Waard (2022)
    # 'GM0398': 'GM1980',
    # 'GM0416': 'GM1980',
    
    # Fusie van Landerd en Uden tot Maashorst (2022)
    # 'GM1685': 'GM1991',
    # 'GM0856': 'GM1991',
    
    # Fusie van Brielle, Hellevoetsluis en Westvoorne tot Voorne aan Zee (2023)
    # 'GM0350': 'GM1992',
    # 'GM0614': 'GM1992',
    # 'GM0501': 'GM1992'
}

df_housing_stock_merged['gemeente'] = df_housing_stock_merged['gemeente'].replace(correction)

In [22]:
# Set a multi-level index based on the municipal code and the BAG VBO ID
index_columns = ['gemeente', 'Woning/vbo_id']
df_housing_stock_merged.set_index(index_columns, inplace=True)

# Drop the duplicate columns with BAG VBO IDs
df_housing_stock_merged = df_housing_stock_merged.iloc[:, 1:]
df_housing_stock_merged.drop('Pand_bagverblijfsobjectid', axis=1, inplace=True)

# Preview data
df_housing_stock_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
GM1979,3010000125985,2,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,
GM1979,3010000125986,2,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,
GM1979,3010000125991,0,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,
GM1979,3010000125992,0,153,x,1.1,0.9,53.0,1950,2,,,,,,
GM1979,3010000125994,1,45,x,1.1,0.9,46.7,1930,1,2.30,E,W,Maisonnette,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0193,193010000105649,0,59,x,1.0,1.0,29.2,1901,4,,,,,,
GM0193,193010000105687,0,233,B,1.0,1.1,22.8,1995,1,,B,W,Vrijstaande woning,,
GM0193,193010000105923,0,136,x,1.0,1.0,14.5,1987,1,,,,,,
GM0193,193010000105959,1,42,A,1.0,1.0,21.2,2007,5,,,,,,


##### Enriching the data with ETM classifications for housing types, construction periods and energy labels

Below you can find the classification of the housing types that PBL uses:

Woningtype (W)

**1** | vrijstaand <br>
**2** | 2-onder-1 kap <br>
**3** | rijwoning hoekwoning <br>
**4** | rijwoning tussenwoning <br>
**5** | appartementen t/m 4 (meergezinswoningen t/m 4 verdiepingen) <br>
**6** | appartementen 5>= (meergezinswoningen 5 of meer verdiepingen)

In the ETM we bundle "2-onder-1 kap" and "rijwoning hoekwoning" to the same category ("hoekhuis"). Same goes "appartementen t/m 4" and "appartementen 5>="; we consider both "appartementen". Let's create a method to classify the housing types.

In [23]:
# Define method for the classification of housing types
def classify_housing_type(housing_type):
    if housing_type == 1:
        return "Vrijstaand huis"
    elif housing_type == 2 or housing_type == 3:
        return "Hoekhuis"
    elif housing_type == 4:
        return "Rijtjeshuis"
    elif housing_type == 5 or housing_type == 6:
        return "Appartement"

In [24]:
# Apply the classification function to the "woningtype" column
df_housing_stock_merged['Woningtype ETM'] = df_housing_stock_merged['Woningkenmerken/woningtype'].apply(classify_housing_type)

# Preview data
df_housing_stock_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
GM1979,3010000125985,2,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement
GM1979,3010000125986,2,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement
GM1979,3010000125991,0,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis
GM1979,3010000125992,0,153,x,1.1,0.9,53.0,1950,2,,,,,,,Hoekhuis
GM1979,3010000125994,1,45,x,1.1,0.9,46.7,1930,1,2.30,E,W,Maisonnette,,,Vrijstaand huis
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0193,193010000105649,0,59,x,1.0,1.0,29.2,1901,4,,,,,,,Rijtjeshuis
GM0193,193010000105687,0,233,B,1.0,1.1,22.8,1995,1,,B,W,Vrijstaande woning,,,Vrijstaand huis
GM0193,193010000105923,0,136,x,1.0,1.0,14.5,1987,1,,,,,,,Vrijstaand huis
GM0193,193010000105959,1,42,A,1.0,1.0,21.2,2007,5,,,,,,,Appartement


Same goes for the construction year ranges:

Bouwperiode (B)

**0** | Tot en met 1929 <br>
**1** | 1930 t/m 1945 <br>
**2** | 1946 t/m 1964 <br>
**3** | 1965 t/m 1974 <br>
**4** | 1975 t/m 1991 <br>
**5** | 1992 t/m 1995 <br>
**6** | 1996 t/m 1999 <br>
**7** | 2000 t/m 2005 <br>
**8** | 2006 t/m 2010 <br>
**9** | 2011 t/m 2014 <br>
**10** | 2015 t/m 2020 <br>
**11** | 2021 en later

In the ETM we use a different classification for building years. Hence, we also want to classify the buildings according to the ETM categories in the column "bouwjaarklasse ETM".

In [25]:
# Define method for the classification of building years
def classify_year(year):
    if year < 1945:
        return "< 1945"
    elif 1945 <= year < 1965:
        return "1945 - 1964"
    elif 1965 <= year < 1985:
        return "1965 - 1984"
    elif 1985 <= year < 2005:
        return "1985 - 2004"
    else:
        return ">= 2005"

In [26]:
# Apply the classification function to the "bouwperiode" column
df_housing_stock_merged['Bouwjaarklasse ETM'] = df_housing_stock_merged['Woningkenmerken/bouwjaar'].apply(classify_year)

# Preview df
df_housing_stock_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM,Bouwjaarklasse ETM
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
GM1979,3010000125985,2,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement,1985 - 2004
GM1979,3010000125986,2,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement,1985 - 2004
GM1979,3010000125991,0,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis,< 1945
GM1979,3010000125992,0,153,x,1.1,0.9,53.0,1950,2,,,,,,,Hoekhuis,1945 - 1964
GM1979,3010000125994,1,45,x,1.1,0.9,46.7,1930,1,2.30,E,W,Maisonnette,,,Vrijstaand huis,< 1945
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0193,193010000105649,0,59,x,1.0,1.0,29.2,1901,4,,,,,,,Rijtjeshuis,< 1945
GM0193,193010000105687,0,233,B,1.0,1.1,22.8,1995,1,,B,W,Vrijstaande woning,,,Vrijstaand huis,1985 - 2004
GM0193,193010000105923,0,136,x,1.0,1.0,14.5,1987,1,,,,,,,Vrijstaand huis,1985 - 2004
GM0193,193010000105959,1,42,A,1.0,1.0,21.2,2007,5,,,,,,,Appartement,>= 2005


Then, we need to map the energy labels to net (or typical) useful heat demands in kWh/m2.

In [27]:
# Define method for the classification of energylabels
def classify_label(label):
    if label == "A" or label == "A+" or label == "A++" or label == "A2+" or label == "A+++" or label == "A3+" or label == "A++++" or label == "A4+" or label == "A+++++" or label == "A5+":
        return 118
    elif label == "B":
        return 175
    elif label == "C":
        return 220
    elif label == "D":
        return 270
    elif label == "E":
        return 313
    elif label == "F":
        return 358
    elif label == "G":
        return 403

In [28]:
# Apply the classification function to the "Pand_energieklasse" column
df_housing_stock_merged['Netto warmtevraag (kWh/m2)'] = df_housing_stock_merged['Pand_warmtebehoefte'].combine_first(df_housing_stock_merged['Pand_energieklasse'].apply(classify_label))

# Preview df
df_housing_stock_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM,Bouwjaarklasse ETM,Netto warmtevraag (kWh/m2)
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
GM1979,3010000125985,2,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement,1985 - 2004,175.0
GM1979,3010000125986,2,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement,1985 - 2004,220.0
GM1979,3010000125991,0,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis,< 1945,403.0
GM1979,3010000125992,0,153,x,1.1,0.9,53.0,1950,2,,,,,,,Hoekhuis,1945 - 1964,
GM1979,3010000125994,1,45,x,1.1,0.9,46.7,1930,1,2.30,E,W,Maisonnette,,,Vrijstaand huis,< 1945,313.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0193,193010000105649,0,59,x,1.0,1.0,29.2,1901,4,,,,,,,Rijtjeshuis,< 1945,
GM0193,193010000105687,0,233,B,1.0,1.1,22.8,1995,1,,B,W,Vrijstaande woning,,,Vrijstaand huis,1985 - 2004,175.0
GM0193,193010000105923,0,136,x,1.0,1.0,14.5,1987,1,,,,,,,Vrijstaand huis,1985 - 2004,
GM0193,193010000105959,1,42,A,1.0,1.0,21.2,2007,5,,,,,,,Appartement,>= 2005,


By combining the total surface and the net heat demand we can also calculate the total useful heat demand for each building:

In [29]:
# Calculate total functional heat demand (for space heating) and store the values in a new column
df_housing_stock_merged['Functionele vraag ruimteverwarming EP-online (kWh)'] = df_housing_stock_merged['Woningkenmerken/oppervlakte'] * df_housing_stock_merged['Netto warmtevraag (kWh/m2)']

# Preview data
df_housing_stock_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM,Bouwjaarklasse ETM,Netto warmtevraag (kWh/m2),Functionele vraag ruimteverwarming EP-online (kWh)
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
GM1979,3010000125985,2,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement,1985 - 2004,175.0,12075.0
GM1979,3010000125986,2,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement,1985 - 2004,220.0,15180.0
GM1979,3010000125991,0,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis,< 1945,403.0,26598.0
GM1979,3010000125992,0,153,x,1.1,0.9,53.0,1950,2,,,,,,,Hoekhuis,1945 - 1964,,
GM1979,3010000125994,1,45,x,1.1,0.9,46.7,1930,1,2.30,E,W,Maisonnette,,,Vrijstaand huis,< 1945,313.0,14085.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0193,193010000105649,0,59,x,1.0,1.0,29.2,1901,4,,,,,,,Rijtjeshuis,< 1945,,
GM0193,193010000105687,0,233,B,1.0,1.1,22.8,1995,1,,B,W,Vrijstaande woning,,,Vrijstaand huis,1985 - 2004,175.0,40775.0
GM0193,193010000105923,0,136,x,1.0,1.0,14.5,1987,1,,,,,,,Vrijstaand huis,1985 - 2004,,
GM0193,193010000105959,1,42,A,1.0,1.0,21.2,2007,5,,,,,,,Appartement,>= 2005,,


In [30]:
# Add new column for the municipal geo ID
df_housing_stock_merged['gemeentecode'] = df_housing_stock_merged.index.get_level_values('gemeente')

# Preview data
df_housing_stock_merged.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM,Bouwjaarklasse ETM,Netto warmtevraag (kWh/m2),Functionele vraag ruimteverwarming EP-online (kWh),gemeentecode
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
GM1979,3010000125985,2,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement,1985 - 2004,175.0,12075.0,GM1979
GM1979,3010000125986,2,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement,1985 - 2004,220.0,15180.0,GM1979
GM1979,3010000125991,0,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis,< 1945,403.0,26598.0,GM1979


Lastly, we'd like to map the owner to string instead of numbers (0, 1 and 2):
* 0: Koop
* 1: Particuliere huur
* 2: Sociale huur

In [31]:
# Define method for the classification of owners
def classify_owner(num):
    if num == 0:
        return "Koop"
    elif num == 1:
        return "Particuliere huur"
    elif num == 2:
        return "Sociale huur"

In [32]:
# Apply the classification function to the "Woningkenmerken/eigendom" column
df_housing_stock_merged['Woningkenmerken/eigendom'] = df_housing_stock_merged['Woningkenmerken/eigendom'].apply(classify_owner)

# Preview df
df_housing_stock_merged

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM,Bouwjaarklasse ETM,Netto warmtevraag (kWh/m2),Functionele vraag ruimteverwarming EP-online (kWh),gemeentecode
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
GM1979,3010000125985,Sociale huur,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement,1985 - 2004,175.0,12075.0,GM1979
GM1979,3010000125986,Sociale huur,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement,1985 - 2004,220.0,15180.0,GM1979
GM1979,3010000125991,Koop,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis,< 1945,403.0,26598.0,GM1979
GM1979,3010000125992,Koop,153,x,1.1,0.9,53.0,1950,2,,,,,,,Hoekhuis,1945 - 1964,,,GM1979
GM1979,3010000125994,Particuliere huur,45,x,1.1,0.9,46.7,1930,1,2.30,E,W,Maisonnette,,,Vrijstaand huis,< 1945,313.0,14085.0,GM1979
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0193,193010000105649,Koop,59,x,1.0,1.0,29.2,1901,4,,,,,,,Rijtjeshuis,< 1945,,,GM0193
GM0193,193010000105687,Koop,233,B,1.0,1.1,22.8,1995,1,,B,W,Vrijstaande woning,,,Vrijstaand huis,1985 - 2004,175.0,40775.0,GM0193
GM0193,193010000105923,Koop,136,x,1.0,1.0,14.5,1987,1,,,,,,,Vrijstaand huis,1985 - 2004,,,GM0193
GM0193,193010000105959,Particuliere huur,42,A,1.0,1.0,21.2,2007,5,,,,,,,Appartement,>= 2005,,,GM0193


##### Grouping data

Let's now group the data into the different ETM categories:

In [33]:
df_housing_stock_merged.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM,Bouwjaarklasse ETM,Netto warmtevraag (kWh/m2),Functionele vraag ruimteverwarming EP-online (kWh),gemeentecode
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
GM1979,3010000125985,Sociale huur,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement,1985 - 2004,175.0,12075.0,GM1979
GM1979,3010000125986,Sociale huur,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement,1985 - 2004,220.0,15180.0,GM1979
GM1979,3010000125991,Koop,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis,< 1945,403.0,26598.0,GM1979


In [34]:
groups = [
    'gemeentecode',
    'Woningtype ETM',
    'Bouwjaarklasse ETM',
    # 'Woningkenmerken/eigendom'
]

filter = [
    'gemeentecode',
    'Woningkenmerken/oppervlakte',
    'Woningtype ETM',
    'Bouwjaarklasse ETM',
    'Functionele vraag/ruimteverwarming',
    # 'Woningkenmerken/eigendom'
]

df_housing_stock_by_groups = df_housing_stock_merged.loc[:, filter].groupby(by=groups).sum()

# Preview
df_housing_stock_by_groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Woningkenmerken/oppervlakte,Functionele vraag/ruimteverwarming
gemeentecode,Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 3_level_1,Unnamed: 4_level_1
GM0014,Appartement,1945 - 1964,982205,450203.6
GM0014,Appartement,1965 - 1984,1299040,573995.1
GM0014,Appartement,1985 - 2004,692134,256755.4
GM0014,Appartement,< 1945,1779584,756600.5
GM0014,Appartement,>= 2005,694370,299185.0
...,...,...,...,...
GM1979,Vrijstaand huis,1945 - 1964,132733,28288.9
GM1979,Vrijstaand huis,1965 - 1984,165379,33571.0
GM1979,Vrijstaand huis,1985 - 2004,216545,50017.0
GM1979,Vrijstaand huis,< 1945,684595,140466.8


We're interested in additional information per category, which needs some preparation:
* the share of the useful heat demand
* the average net heat demand (kWh/m2)
* the number of buildings per group

##### Preparing the number of households calculation

In [35]:
# Determine number of households per group and store the values in a separate grouped dataframe
df_housing_stock_count = df_housing_stock_merged.loc[:, filter].groupby(by=groups).count()
df_housing_stock_count = pd.DataFrame(df_housing_stock_count['Woningkenmerken/oppervlakte'])
df_housing_stock_count = df_housing_stock_count.rename(columns={'Woningkenmerken/oppervlakte': 'Aantal woningen (#)'})

# Preview
df_housing_stock_count

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Aantal woningen (#)
gemeentecode,Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 3_level_1
GM0014,Appartement,1945 - 1964,13050
GM0014,Appartement,1965 - 1984,16824
GM0014,Appartement,1985 - 2004,7854
GM0014,Appartement,< 1945,22095
GM0014,Appartement,>= 2005,9595
...,...,...,...
GM1979,Vrijstaand huis,1945 - 1964,748
GM1979,Vrijstaand huis,1965 - 1984,923
GM1979,Vrijstaand huis,1985 - 2004,1270
GM1979,Vrijstaand huis,< 1945,3589


##### Preparing the average net heat demand calculation

The calculation of the average net heat demand in kWh/m2 should be based on the EP-online data instead of the PBL data. Hence, we create a copy of the housing stock dataframe and drop all the rows that have NaN values for either the 'Functionele vraag ruimteverwarming EP-online (kWh)' or 'Woningkenmerken/oppervlakte' column.

In [36]:
# Create a copy of the housing stock dataframe
df_housing_stock_filtered = df_housing_stock_merged.copy()

# Drop rows where 'Functionele vraag ruimteverwarming EP-online (kWh)' or 'Woningkenmerken/oppervlakte' has NaN values
df_housing_stock_filtered = df_housing_stock_filtered.dropna(subset=['Functionele vraag ruimteverwarming EP-online (kWh)', 'Woningkenmerken/oppervlakte'])

# Preview dataframe
df_housing_stock_filtered.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/eigendom,Woningkenmerken/oppervlakte,Woningkenmerken/schillabel,Regionale klimaatcorrectie/regionale klimaatcorrectie,Functionele vraag/Lokale praktijkfactor,Functionele vraag/ruimteverwarming,Woningkenmerken/bouwjaar,Woningkenmerken/woningtype,Pand_energieindex,Pand_energieklasse,Pand_gebouwklasse,Pand_gebouwtype,Pand_gebruiksoppervlakte_thermische_zone,Pand_warmtebehoefte,Woningtype ETM,Bouwjaarklasse ETM,Netto warmtevraag (kWh/m2),Functionele vraag ruimteverwarming EP-online (kWh),gemeentecode
gemeente,Woning/vbo_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
GM1979,3010000125985,Sociale huur,69,B,1.1,1.1,19.0,2002,5,1.38,B,W,Flatwoning (overig),,,Appartement,1985 - 2004,175.0,12075.0,GM1979
GM1979,3010000125986,Sociale huur,69,C,1.1,1.1,21.4,2002,5,1.44,C,W,Flatwoning (overig),,,Appartement,1985 - 2004,220.0,15180.0,GM1979
GM1979,3010000125991,Koop,66,G,1.1,1.0,30.2,1925,4,,G,W,Vrijstaande woning,,,Rijtjeshuis,< 1945,403.0,26598.0,GM1979


Then, we group the dataframe following the ETM categories.

In [37]:
groups = [
    'gemeentecode',
    'Woningtype ETM',
    'Bouwjaarklasse ETM',
    # 'Woningkenmerken/eigendom'
]

filter = [
    'gemeentecode',
    'Woningtype ETM',
    'Bouwjaarklasse ETM',
    'Woningkenmerken/oppervlakte',
    'Functionele vraag ruimteverwarming EP-online (kWh)',
    # 'Woningkenmerken/eigendom'
]

df_housing_stock_filtered_by_groups = df_housing_stock_filtered.loc[:, filter].groupby(by=groups).sum()

# Preview
df_housing_stock_filtered_by_groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Woningkenmerken/oppervlakte,Functionele vraag ruimteverwarming EP-online (kWh)
gemeentecode,Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 3_level_1,Unnamed: 4_level_1
GM0014,Appartement,1945 - 1964,778894,1.698123e+08
GM0014,Appartement,1965 - 1984,1068190,2.035975e+08
GM0014,Appartement,1985 - 2004,517907,8.049684e+07
GM0014,Appartement,< 1945,1128635,2.522622e+08
GM0014,Appartement,>= 2005,536813,6.518018e+07
...,...,...,...,...
GM1979,Vrijstaand huis,1945 - 1964,45103,1.318272e+07
GM1979,Vrijstaand huis,1965 - 1984,60789,1.315512e+07
GM1979,Vrijstaand huis,1985 - 2004,91858,1.302886e+07
GM1979,Vrijstaand huis,< 1945,222592,6.844244e+07


##### Enriching the data by adding the useful heat demand shares, the average net heat demand and the number of households

In [38]:
# Add columns to grouped dataframe
df_housing_stock_by_groups['Functionele vraag ruimteverwarming (% van totaal)'] = float('nan')
df_housing_stock_by_groups['Gemiddelde netto warmtevraag (kWh/m2)'] = float('nan')
df_housing_stock_by_groups['Aantal woningen (#)'] = float('nan')

# Preview
df_housing_stock_by_groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Woningkenmerken/oppervlakte,Functionele vraag/ruimteverwarming,Functionele vraag ruimteverwarming (% van totaal),Gemiddelde netto warmtevraag (kWh/m2),Aantal woningen (#)
gemeentecode,Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GM0014,Appartement,1945 - 1964,982205,450203.6,,,
GM0014,Appartement,1965 - 1984,1299040,573995.1,,,
GM0014,Appartement,1985 - 2004,692134,256755.4,,,
GM0014,Appartement,< 1945,1779584,756600.5,,,
GM0014,Appartement,>= 2005,694370,299185.0,,,
...,...,...,...,...,...,...,...
GM1979,Vrijstaand huis,1945 - 1964,132733,28288.9,,,
GM1979,Vrijstaand huis,1965 - 1984,165379,33571.0,,,
GM1979,Vrijstaand huis,1985 - 2004,216545,50017.0,,,
GM1979,Vrijstaand huis,< 1945,684595,140466.8,,,


Now let's fill these empty columns with the relevant data:

In [39]:
GJ_TO_KWH = 1 / 0.0036

PERC = 100.

for municipality in municipalities:
    for housing_type in ['Appartement', 'Hoekhuis', 'Rijtjeshuis', 'Vrijstaand huis']:
        for construction_period in ['< 1945', '1945 - 1964', '1965 - 1984', '1985 - 2004', '>= 2005']:
            
# If you would like to include the type of owner in your data, make sure to uncomment the lines of code below.

#             for owner in ['Koop', 'Particuliere huur', 'Sociale huur']:
#                 # Calculate share of heating demand per building year range for each housing type
#                     try:
#                         df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period, owner), 'Functionele vraag ruimteverwarming (% van totaal)'] = (
#                             PERC * df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period, owner), 'Functionele vraag/ruimteverwarming'] / 
#                             df_housing_stock_by_groups.loc[(municipality, slice(None), slice(None), owner), 'Functionele vraag/ruimteverwarming'].sum()
#                         )
#                     except:
#                         pass

#                     # Calculate net heating demand by dividing the total EP-online functional heating demand by the total surface for the given housing type / construction period combination 
#                     try:
#                         df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period, owner), 'Gemiddelde netto warmtevraag (kWh/m2)'] = (
#                             df_housing_stock_filtered_by_groups.loc[(municipality, housing_type, construction_period, owner), 'Functionele vraag ruimteverwarming EP-online (kWh)'] / 
#                             df_housing_stock_filtered_by_groups.loc[(municipality, housing_type, construction_period, owner), 'Woningkenmerken/oppervlakte']
#                         )
#                     except:
#                         pass

#                     # Count the number of residences per category
#                     try:
#                         df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period, owner), 'Aantal woningen (#)'] = int(
#                             df_housing_stock_count.loc[(municipality, housing_type, construction_period, owner), 'Aantal woningen (#)']
#                         )
#                     except:
#                         pass

# If you would like to include the type of owner in your data, make sure to comment out the lines of code below.
            
            # Calculate share of heating demand per building year range for each housing type
            try:
                df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period), 'Functionele vraag ruimteverwarming (% van totaal)'] = (
                    PERC * df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period), 'Functionele vraag/ruimteverwarming'] / 
                    df_housing_stock_by_groups.loc[(municipality, slice(None), slice(None)), 'Functionele vraag/ruimteverwarming'].sum()
                )
            except:
                pass

            # Calculate net heating demand by dividing the total EP-online functional heating demand by the total surface for the given housing type / construction period combination 
            try:
                df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period), 'Gemiddelde netto warmtevraag (kWh/m2)'] = (
                    df_housing_stock_filtered_by_groups.loc[(municipality, housing_type, construction_period), 'Functionele vraag ruimteverwarming EP-online (kWh)'] / 
                    df_housing_stock_filtered_by_groups.loc[(municipality, housing_type, construction_period), 'Woningkenmerken/oppervlakte']
                )
            except:
                pass
            
            # Count the number of residences per category
            try:
                df_housing_stock_by_groups.loc[(municipality, housing_type, construction_period), 'Aantal woningen (#)'] = int(
                    df_housing_stock_count.loc[(municipality, housing_type, construction_period), 'Aantal woningen (#)']
                )
            except:
                pass
        
# Preview
df_housing_stock_by_groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Woningkenmerken/oppervlakte,Functionele vraag/ruimteverwarming,Functionele vraag ruimteverwarming (% van totaal),Gemiddelde netto warmtevraag (kWh/m2),Aantal woningen (#)
gemeentecode,Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
GM0014,Appartement,1945 - 1964,982205,450203.6,11.549155,218.017270,13050.0
GM0014,Appartement,1965 - 1984,1299040,573995.1,14.724800,190.600488,16824.0
GM0014,Appartement,1985 - 2004,692134,256755.4,6.586593,155.427211,7854.0
GM0014,Appartement,< 1945,1779584,756600.5,19.409210,223.510900,22095.0
GM0014,Appartement,>= 2005,694370,299185.0,7.675047,121.420648,9595.0
...,...,...,...,...,...,...,...
GM1979,Vrijstaand huis,1945 - 1964,132733,28288.9,3.345294,292.280279,748.0
GM1979,Vrijstaand huis,1965 - 1984,165379,33571.0,3.969927,216.406188,923.0
GM1979,Vrijstaand huis,1985 - 2004,216545,50017.0,5.914743,141.836994,1270.0
GM1979,Vrijstaand huis,< 1945,684595,140466.8,16.610852,307.479355,3589.0


##### Filtering for a subset of municipalities (e.g. all municipalities in Drenthe)

In [40]:
# Filter for the Drenthe municipalities
df_housing_stock_by_groups_filtered = df_housing_stock_by_groups[df_housing_stock_by_groups.index.get_level_values('gemeentecode').isin(municipalities)]

# Drop the 'Functionele vraag ruimteverwarming (%) van totaal' column
del df_housing_stock_by_groups_filtered['Functionele vraag ruimteverwarming (% van totaal)']

# Preview data
df_housing_stock_by_groups_filtered

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Woningkenmerken/oppervlakte,Functionele vraag/ruimteverwarming,Gemiddelde netto warmtevraag (kWh/m2),Aantal woningen (#)
gemeentecode,Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GM0014,Appartement,1945 - 1964,982205,450203.6,218.017270,13050.0
GM0014,Appartement,1965 - 1984,1299040,573995.1,190.600488,16824.0
GM0014,Appartement,1985 - 2004,692134,256755.4,155.427211,7854.0
GM0014,Appartement,< 1945,1779584,756600.5,223.510900,22095.0
GM0014,Appartement,>= 2005,694370,299185.0,121.420648,9595.0
...,...,...,...,...,...,...
GM1979,Vrijstaand huis,1945 - 1964,132733,28288.9,292.280279,748.0
GM1979,Vrijstaand huis,1965 - 1984,165379,33571.0,216.406188,923.0
GM1979,Vrijstaand huis,1985 - 2004,216545,50017.0,141.836994,1270.0
GM1979,Vrijstaand huis,< 1945,684595,140466.8,307.479355,3589.0


##### Summing municipal data to national values

Calculate the same thing for the Netherlands (**nl2019**) as a whole (instead of per municipality). Start off with summing the values for all municipalities.

In [41]:
# Sum the data for all municipalities
df_housing_stock_by_groups_nl2019 = df_housing_stock_by_groups.groupby(level=['Woningtype ETM', 'Bouwjaarklasse ETM']).sum()
df_housing_stock_filtered_by_groups_nl2019 = df_housing_stock_filtered_by_groups.groupby(level=['Woningtype ETM', 'Bouwjaarklasse ETM']).sum()

# Preview data
df_housing_stock_by_groups_nl2019

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/oppervlakte,Functionele vraag/ruimteverwarming,Functionele vraag ruimteverwarming (% van totaal),Gemiddelde netto warmtevraag (kWh/m2),Aantal woningen (#)
Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Appartement,1945 - 1964,27884599,13874542.5,902.345769,77578.057617,389313.0
Appartement,1965 - 1984,56554817,27891987.0,2363.11866,65593.755896,764334.0
Appartement,1985 - 2004,49765972,22519169.2,2092.699035,52755.395048,619698.0
Appartement,< 1945,44395579,18272911.0,756.033092,85444.354942,540581.0
Appartement,>= 2005,38760800,16911616.6,1718.833085,40087.295964,460930.0
Hoekhuis,1945 - 1964,34018773,11201293.0,1792.831069,84957.646322,298376.0
Hoekhuis,1965 - 1984,70972849,21246499.2,3428.620872,71409.428423,564777.0
Hoekhuis,1985 - 2004,51009559,14192741.2,2082.826,57029.096744,379212.0
Hoekhuis,< 1945,42270072,11309120.6,1524.131438,103582.28741,303016.0
Hoekhuis,>= 2005,24397522,6179049.4,922.469337,41102.901142,165161.0


Of course taking a simple sum doesn't hold for a couple of columns such as 'Functionele vraag ruimteverwarming (% van totaal)' and 'Gemiddelde netto warmtevraag (kWh/m2)'. Thus, we should calculate these values again and overwrite the old incorrect values.

In [42]:
PERC = 100.

for housing_type in ['Appartement', 'Hoekhuis', 'Rijtjeshuis', 'Vrijstaand huis']:
    for construction_period in ['< 1945', '1945 - 1964', '1965 - 1984', '1985 - 2004', '>= 2005']:

        # Calculate share of heating demand per building year range for each housing type
        try:
            df_housing_stock_by_groups_nl2019.loc[(housing_type, construction_period), 'Functionele vraag ruimteverwarming (% van totaal)'] = (
                PERC * df_housing_stock_by_groups_nl2019.loc[(housing_type, construction_period), 'Functionele vraag/ruimteverwarming'] / 
                df_housing_stock_by_groups_nl2019.loc[(slice(None), slice(None)), 'Functionele vraag/ruimteverwarming'].sum()
            )
        except:
            pass
        
        # Calculate net heating demand by dividing the total EP-online functional heating demand by the total surface for the given housing type / construction period combination 
        try:
            df_housing_stock_by_groups_nl2019.loc[(housing_type, construction_period), 'Gemiddelde netto warmtevraag (kWh/m2)'] = (
                df_housing_stock_filtered_by_groups_nl2019.loc[(housing_type, construction_period), 'Functionele vraag ruimteverwarming EP-online (kWh)'] / 
                df_housing_stock_filtered_by_groups_nl2019.loc[(housing_type, construction_period), 'Woningkenmerken/oppervlakte']
            )
        except:
            pass
        
# Preview national data
df_housing_stock_by_groups_nl2019

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/oppervlakte,Functionele vraag/ruimteverwarming,Functionele vraag ruimteverwarming (% van totaal),Gemiddelde netto warmtevraag (kWh/m2),Aantal woningen (#)
Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Appartement,1945 - 1964,27884599,13874542.5,4.786749,224.839492,389313.0
Appartement,1965 - 1984,56554817,27891987.0,9.622799,192.575748,764334.0
Appartement,1985 - 2004,49765972,22519169.2,7.769165,153.628123,619698.0
Appartement,< 1945,44395579,18272911.0,6.304196,308.070927,540581.0
Appartement,>= 2005,38760800,16911616.6,5.834546,115.84463,460930.0
Hoekhuis,1945 - 1964,34018773,11201293.0,3.864472,242.426645,298376.0
Hoekhuis,1965 - 1984,70972849,21246499.2,7.330091,201.902254,564777.0
Hoekhuis,1985 - 2004,51009559,14192741.2,4.896528,160.097666,379212.0
Hoekhuis,< 1945,42270072,11309120.6,3.901672,295.358273,303016.0
Hoekhuis,>= 2005,24397522,6179049.4,2.131786,116.359066,165161.0


In [43]:
# Add the average surface per category
df_housing_stock_by_groups_nl2019['Gemiddelde oppervlakte (m2)'] = df_housing_stock_by_groups_nl2019['Woningkenmerken/oppervlakte'] / df_housing_stock_by_groups_nl2019['Aantal woningen (#)']

# Preview data
df_housing_stock_by_groups_nl2019

Unnamed: 0_level_0,Unnamed: 1_level_0,Woningkenmerken/oppervlakte,Functionele vraag/ruimteverwarming,Functionele vraag ruimteverwarming (% van totaal),Gemiddelde netto warmtevraag (kWh/m2),Aantal woningen (#),Gemiddelde oppervlakte (m2)
Woningtype ETM,Bouwjaarklasse ETM,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Appartement,1945 - 1964,27884599,13874542.5,4.786749,224.839492,389313.0,71.625142
Appartement,1965 - 1984,56554817,27891987.0,9.622799,192.575748,764334.0,73.992282
Appartement,1985 - 2004,49765972,22519169.2,7.769165,153.628123,619698.0,80.306814
Appartement,< 1945,44395579,18272911.0,6.304196,308.070927,540581.0,82.125674
Appartement,>= 2005,38760800,16911616.6,5.834546,115.84463,460930.0,84.092595
Hoekhuis,1945 - 1964,34018773,11201293.0,3.864472,242.426645,298376.0,114.013101
Hoekhuis,1965 - 1984,70972849,21246499.2,7.330091,201.902254,564777.0,125.665261
Hoekhuis,1985 - 2004,51009559,14192741.2,4.896528,160.097666,379212.0,134.514622
Hoekhuis,< 1945,42270072,11309120.6,3.901672,295.358273,303016.0,139.497822
Hoekhuis,>= 2005,24397522,6179049.4,2.131786,116.359066,165161.0,147.719631


#### Load
Load the data into the intermediate data folder. This is the data we will be using for the next steps in the pipeline.

In [44]:
# Specify path for the to be created CSV file
path = Path("data", "intermediate", "housing_stock_by_groups.csv")

# Write the dataframe to this path
df_housing_stock_by_groups.to_csv(path)

In [45]:
# Specify path for the to be created CSV file
path = Path("data", "intermediate", "housing_stock_by_groups_nl2019.csv")

# Write the dataframe to this path
df_housing_stock_by_groups_nl2019.to_csv(path)

#### Analysis and visualisation (WIP)

This is still work in progress. If you want to visualize the housing stock for a given municipality, you may uncomment the code in the cells below. Also make sure that in the previous cells, the lines of code related to the owners are uncommented.

In [46]:
# import matplotlib.pyplot as plt
# import squarify
# import seaborn as sb

In [47]:
# # Add a column for 'Aantal woningen (#)'. When applying a sum to the groups this aumotically creates a count this way.
# df_housing_stock_merged['Aantal woningen (#)'] = 1

In [48]:
# # Select a municipality
# municipality = 'GM0106' # Assen

# # Filter the data for this municipality
# df_municipality = df_housing_stock_merged.loc[municipality]

# # Preview filtered data
# df_municipality.head(3)

In [49]:
# # Group the data by owner

# groups = [
#     'Woningkenmerken/eigendom',
# ]

# filter = [
#     'Woningkenmerken/eigendom',
#     'Aantal woningen (#)',
#     'Woningkenmerken/oppervlakte',
#     'Functionele vraag ruimteverwarming EP-online (kWh)'
# ]

# df_chart = df_municipality.loc[:, filter].groupby(by=groups).sum()

# # df_chart

In [50]:
# # Add a column for the net heat demand in kWh/m2
# try:
#     df_chart['Gemiddelde netto warmtevraag (kWh/m2)'] = (
#         df_chart['Functionele vraag ruimteverwarming EP-online (kWh)'] / 
#         df_chart['Woningkenmerken/oppervlakte']
#     )
# except:
#     pass

# df_chart

In [51]:
# # Select the values that determine the size of the blocks
# size_values = df_chart['Aantal woningen (#)']
# color_values = df_chart['Gemiddelde netto warmtevraag (kWh/m2)']

# # Normalize color values to be between 0 and 1
# normalized_colors = [(val - min(color_values)) / (max(color_values) - min(color_values)) for val in color_values]

# # Select the labels for inside the blocks
# column = 'Gemiddelde netto warmtevraag (kWh/m2)'
# labels = [f'{owner}:\n {round(df_chart.loc[owner, column])} kWh/m2' for owner in df_chart.index]

# # Treemap
# squarify.plot(
#     sizes=size_values,
#     label=labels,
#     pad = 0.2,
#     text_kwargs = {'fontsize': 10, 'color': 'white'},
#     color = plt.cm.tab10(normalized_colors)
# )

# # Remove the axis:
# plt.axis("off")

### Services (or buildings)

#### Extract
First, we extract the raw **verrijkte BAG** data by TNO in csv format. Then, we convert this into a dataframe.

In [52]:
# Specify the path to the raw data file
path = Path("data", "raw", "TNO-2023-P10648_vbobestand.csv")

# Extract the data and turn it into a dataframe
df_raw_bag_tno = pd.read_csv(path, header=[0], sep=",", low_memory=False, encoding='latin1')

In [53]:
# Preview data
df_raw_bag_tno

Unnamed: 0,vboid,vbo_ligt_binnen_x_panden,vboid_x,vboid_binnen_ander_pand,vbo_opp_m2,vbo_opp_cor_m2,vbo_opp_m2_x,pandid,bouwjaar,aantal_vbo_in_dit_pand,...,label,isso_nen,ei_origineel,label_origineel,gf_nta_epa,pand_label_keus,warmtenet,warmtenet_pbl1,warmtenet_pbl,in_ubouwpand
0,1.930100e+14,1.0,v0193010000030663_1,0,548.0,548.0,548.0,193100000029998,1977,1,...,,,,,,F,,0,,1
1,1.600100e+14,1.0,v0160010000051304_1,0,42.0,42.0,42.0,160100001392420,1989,1,...,,,,,,E,,0,,1
2,3.920100e+14,1.0,v0392010000003208_1,0,2982.0,2982.0,2982.0,392100000061537,1993,2,...,,,,,,D,,0,,1
3,6.320100e+14,1.0,v0632010000007072_1,0,1198.0,1198.0,1198.0,632100000015217,1950,1,...,1.0,NTA,,A,Kantoorfunctie,A,nee,0,,1
4,3.630100e+14,1.0,v0363010000810818_1,0,1.0,1.0,1.0,363100012075402,1971,1,...,,,,,,G,,0,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2614033,2.940100e+14,1.0,v0294010000414631_1,0,80.0,80.0,80.0,294100000413386,1997,1,...,,,,,,C,,0,,1
2614034,3.630100e+14,1.0,v0363010001168516_1,0,77.0,77.0,77.0,363100012238489,2010,120,...,,,,,,A,,1,A'dam Noord en West,1
2614035,6.370100e+14,1.0,v0637010000265529_1,0,80.0,80.0,80.0,637100000157907,1988,76,...,,,,,,C,,0,,1
2614036,6.540100e+14,1.0,v0654010000044162_1,0,73.0,73.0,73.0,654100000096180,2016,8,...,,,,,,A2+,,0,,1


#### Transform
The raw data should be filtered, cleaned and enriched before we can use it.

##### Cleaning and preprocessing the data

First, we need to drop the buildings with the BAG use functions "woon" and "industrie".

In [54]:
# Drop rows for which the value is equal to 1 in the columns 'f1woon' and 'f9industrie'
df_cleaned_bag_tno = df_raw_bag_tno[(df_raw_bag_tno['f1woon'] != 1) & (df_raw_bag_tno['f9industrie'] != 1)]

# Preview data
df_cleaned_bag_tno

Unnamed: 0,vboid,vbo_ligt_binnen_x_panden,vboid_x,vboid_binnen_ander_pand,vbo_opp_m2,vbo_opp_cor_m2,vbo_opp_m2_x,pandid,bouwjaar,aantal_vbo_in_dit_pand,...,label,isso_nen,ei_origineel,label_origineel,gf_nta_epa,pand_label_keus,warmtenet,warmtenet_pbl1,warmtenet_pbl,in_ubouwpand
0,1.930100e+14,1.0,v0193010000030663_1,0,548.0,548.0,548.0,193100000029998,1977,1,...,,,,,,F,,0,,1
1,1.600100e+14,1.0,v0160010000051304_1,0,42.0,42.0,42.0,160100001392420,1989,1,...,,,,,,E,,0,,1
4,3.630100e+14,1.0,v0363010000810818_1,0,1.0,1.0,1.0,363100012075402,1971,1,...,,,,,,G,,0,,1
5,5.460100e+14,3.0,v0546010000074033_3,1,350.0,350.0,117.0,546100000036201,1915,1,...,,,,,,G,,0,,1
6,6.270100e+14,2.0,v0627010000029453_2,1,6116.0,6116.0,3058.0,627100000005741,2009,1,...,1.0,ISSO,0.77,A,Winkelfunctie,A2+,nee,0,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2614029,4.701000e+13,1.0,v0047010000311630_1,0,36.0,36.0,36.0,47100000264426,1875,2,...,,,,,,G,,0,,1
2614031,6.260100e+14,1.0,v0626010000009793_1,0,17.0,17.0,17.0,626100000007527,1971,11,...,,,,,,G,,0,,1
2614032,6.320100e+14,1.0,v0632010000003623_1,0,15.0,15.0,15.0,632100000020160,1957,7,...,,,,,,G,,0,,1
2614033,2.940100e+14,1.0,v0294010000414631_1,0,80.0,80.0,80.0,294100000413386,1997,1,...,,,,,,C,,0,,1


Next, we need to drop the buildings that were built after 2019.

In [55]:
# Keep the rows for which the building year is less or equal to 2019
df_cleaned_bag_tno = df_cleaned_bag_tno[df_cleaned_bag_tno['bouwjaar'] <= 2019]

# Check if this went well
df_cleaned_bag_tno['bouwjaar'].max()

2019

The dataset still has a lot of columns we're not interested in. Let's specify the ones we want to keep and filter for those:
* vboid
* vbo_opp_m2
* bouwjaar
* gemeentenaam
* gemeente_id
* pand_label_keus

In [56]:
# Specify the columns to keep
columns_to_keep = [
    'vboid',
    'vbo_opp_m2',
    'bouwjaar',
    'gemeentenaam',
    'gemeente_id',
    'pand_label_keus'
]

df_filtered_bag_tno = df_cleaned_bag_tno[columns_to_keep]

# Preview data
df_filtered_bag_tno.head(3)

Unnamed: 0,vboid,vbo_opp_m2,bouwjaar,gemeentenaam,gemeente_id,pand_label_keus
0,193010000000000.0,548.0,1977,Zwolle,GM0193,F
1,160010000000000.0,42.0,1989,Hardenberg,GM0160,E
4,363010000000000.0,1.0,1971,Amsterdam,GM0363,G


##### Enriching the data with ETM classification for energy labels

First, create a copy of the dataframe. Then, set the index to the BAG VBO ID.

In [57]:
# Create a copy of the data
df_building_stock = df_filtered_bag_tno.copy()

# Set a multi-level index based on the municipal code and the BAG VBO ID
index_columns = ['vboid']
df_building_stock.set_index(index_columns, inplace=True)

# Preview data
df_building_stock.head(3)

Unnamed: 0_level_0,vbo_opp_m2,bouwjaar,gemeentenaam,gemeente_id,pand_label_keus
vboid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
193010000000000.0,548.0,1977,Zwolle,GM0193,F
160010000000000.0,42.0,1989,Hardenberg,GM0160,E
363010000000000.0,1.0,1971,Amsterdam,GM0363,G


Calculate the net heat demand in kWh/m2 as well as the useful heat demand in kWh for each building:

In [58]:
# Apply the classification function to the "Pand_energieklasse" column
df_building_stock['Netto warmtevraag (kWh/m2)'] = df_building_stock['pand_label_keus'].apply(classify_label)

# Calculate useful demand based on the EPI 
df_building_stock['Functionele vraag ruimteverwarming (kWh)'] = df_building_stock['vbo_opp_m2'] * df_building_stock['Netto warmtevraag (kWh/m2)']

# Preview data
df_building_stock

Unnamed: 0_level_0,vbo_opp_m2,bouwjaar,gemeentenaam,gemeente_id,pand_label_keus,Netto warmtevraag (kWh/m2),Functionele vraag ruimteverwarming (kWh)
vboid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1.930100e+14,548.0,1977,Zwolle,GM0193,F,358,196184.0
1.600100e+14,42.0,1989,Hardenberg,GM0160,E,313,13146.0
3.630100e+14,1.0,1971,Amsterdam,GM0363,G,403,403.0
5.460100e+14,350.0,1915,Leiden,GM0546,G,403,141050.0
6.270100e+14,6116.0,2009,Waddinxveen,GM0627,A2+,118,721688.0
...,...,...,...,...,...,...,...
4.701000e+13,36.0,1875,Veendam,GM0047,G,403,14508.0
6.260100e+14,17.0,1971,Voorschoten,GM0626,G,403,6851.0
6.320100e+14,15.0,1957,Woerden,GM0632,G,403,6045.0
2.940100e+14,80.0,1997,Winterswijk,GM0294,C,220,17600.0


##### Grouping data

In [59]:
groups = [
    'gemeente_id'
]

filter = [
    'gemeente_id',
    'vbo_opp_m2',
    'Functionele vraag ruimteverwarming (kWh)'
]

df_building_stock_by_groups = df_building_stock.loc[:, filter].groupby(by=groups).sum()

# Preview
df_building_stock_by_groups

Unnamed: 0_level_0,vbo_opp_m2,Functionele vraag ruimteverwarming (kWh)
gemeente_id,Unnamed: 1_level_1,Unnamed: 2_level_1
GM0014,5407630.0,1.336494e+09
GM0034,2460212.0,4.004712e+08
GM0037,391381.0,9.976618e+07
GM0047,327584.0,8.814509e+07
GM0050,348892.0,7.000003e+07
...,...,...
GM1978,321825.0,8.262022e+07
GM1979,452727.0,1.225014e+08
GM1980,858228.0,1.752156e+08
GM1982,1043770.0,2.640164e+08


##### Enriching the data by adding the average net heat demand and the number of buildings

In [60]:
# Add column for the average net heat demand to grouped dataframe and calculate the values
df_building_stock_by_groups['Gemiddelde netto warmtevraag (kWh/m2)'] = df_building_stock_by_groups['Functionele vraag ruimteverwarming (kWh)'] / df_building_stock_by_groups['vbo_opp_m2']

# Add column for the number of buildings and determine the values
df_building_stock_by_groups['Aantal gebouwen (#)'] = df_building_stock.loc[:, ['gemeente_id', 'vbo_opp_m2']].groupby(by='gemeente_id').count()

# Preview
df_building_stock_by_groups

Unnamed: 0_level_0,vbo_opp_m2,Functionele vraag ruimteverwarming (kWh),Gemiddelde netto warmtevraag (kWh/m2),Aantal gebouwen (#)
gemeente_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GM0014,5407630.0,1.336494e+09,247.149671,13065
GM0034,2460212.0,4.004712e+08,162.779124,4305
GM0037,391381.0,9.976618e+07,254.908079,1995
GM0047,327584.0,8.814509e+07,269.076295,1773
GM0050,348892.0,7.000003e+07,200.635228,2088
...,...,...,...,...
GM1978,321825.0,8.262022e+07,256.724074,1688
GM1979,452727.0,1.225014e+08,270.585527,2650
GM1980,858228.0,1.752156e+08,204.159766,3397
GM1982,1043770.0,2.640164e+08,252.945019,3396


##### Summing municipal data to national values

Also here, make sure to calculate the same thing for the Netherlands (**nl2019**) as a whole (instead of per municipality)

In [61]:
# Sum the columns to get the values for nl2019
df_building_stock_by_groups_nl2019 = pd.DataFrame({'nl2019': df_building_stock_by_groups.sum()})

# Calculate the average net heat demand again and overwrite the (incorrect) value
df_building_stock_by_groups_nl2019.loc['Gemiddelde netto warmtevraag (kWh/m2)', 'nl2019'] = df_building_stock_by_groups_nl2019.loc['Functionele vraag ruimteverwarming (kWh)', 'nl2019'] / df_building_stock_by_groups_nl2019.loc['vbo_opp_m2', 'nl2019']

# Add the average building surface in m2
df_building_stock_by_groups_nl2019.loc['Gemiddelde oppervlakte (m2)', 'nl2019'] = df_building_stock_by_groups_nl2019.loc['vbo_opp_m2', 'nl2019'] / df_building_stock_by_groups_nl2019.loc['Aantal gebouwen (#)', 'nl2019']

# Preview data
df_building_stock_by_groups_nl2019

Unnamed: 0,nl2019
vbo_opp_m2,267934400.0
Functionele vraag ruimteverwarming (kWh),65261050000.0
Gemiddelde netto warmtevraag (kWh/m2),243.571
Aantal gebouwen (#),912497.0
Gemiddelde oppervlakte (m2),293.6277


#### Load
Load the data into the intermediate data folder. This is the data we will be using for the next steps in the pipeline.

In [62]:
# Specify path for the to be created CSV file
path = Path("data", "intermediate", "building_stock_by_groups.csv")

# Write the dataframe to this path
df_building_stock_by_groups.to_csv(path)

In [63]:
# Specify path for the to be created CSV file
path = Path("data", "intermediate", "building_stock_by_groups_nl2019.csv")

# Write the dataframe to this path
df_building_stock_by_groups_nl2019.to_csv(path)

### ETLocal

#### Setup ETLocal templates

In [64]:
# TODO: Describe that by running this cell an empty template is created. This overwrites the filled template.

# Load ETLocal template with target keys from config
path = Path("config", "etlocal_interface_elements.csv")
empty_template = pd.read_csv(path, header=[0], sep=sep)#, index_col=[2])

# Add columns geo_id, value and commit to the template    
for column in ['geo_id', 'value', 'commit']:
    empty_template[column] = float('nan')
    
# Fill the geo_id column with all relevant municipal geo IDs
templates = []
for municipality in municipalities:
    template_to_add = empty_template.copy()
    template_to_add['geo_id'] = municipality
    templates.append(template_to_add)
    
# Concatenate list of templates to one big template with all municipalities
template = pd.concat(templates)

# Transform the templates into a multi-index dataframe
index = pd.MultiIndex.from_frame(template[['geo_id', 'group', 'subgroup', 'key']])
template = template.drop(columns=['geo_id', 'group', 'subgroup', 'key'])
template.index = index

# Preview merged template
template

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,unit,value,commit
geo_id,group,subgroup,key,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GM0003,area,area_emission_factors,file_carriers_imported_heat_co2_conversion_per_mj,kg/MJ,,
GM0003,area,area_emission_factors,file_carriers_propane_co2_conversion_per_mj,kg/MJ,,
GM0003,households,households_applications,households_final_demand_electricity_households_final_demand_for_cooking_electricity_parent_share,%,,
GM0003,households,households_applications,households_final_demand_electricity_households_final_demand_for_cooling_electricity_parent_share,%,,
GM0003,households,households_applications,households_final_demand_electricity_households_final_demand_for_hot_water_electricity_parent_share,%,,
...,...,...,...,...,...,...
GM0193,energy,energy_heat_production,input_energy_heat_burner_mt_crude_oil_production,TJ,,
GM0193,energy,energy_heat_production,energy_import_heat_demand,TJ,,
GM0193,energy,energy_energy_demand,input_energy_heat_distribution_ht_loss_share,%,,
GM0193,energy,energy_energy_demand,input_energy_heat_distribution_mt_loss_share,%,,


#### Fill ETLocal templates

Preview the ETLocal keys that are relevant for the **households** energy demand category

In [65]:
# Filter the ETLocal keys that are relevant for the households and buildings building stock and insulation level categories
filter_housing_stock_and_insulation = (slice(None), 'households', ['households_housing_stock', 'households_insulation_level'])

# Preview the filtered template
template.loc[filter_housing_stock_and_insulation, :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,unit,value,commit
geo_id,group,subgroup,key,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GM0003,households,households_housing_stock,number_of_inhabitants,#,,
GM0003,households,households_housing_stock,residences_roof_surface_available_for_pv,km2,,
GM0003,households,households_housing_stock,households_number_of_apartments_2005,#,,
GM0003,households,households_housing_stock,households_number_of_apartments_1985_2004,#,,
GM0003,households,households_housing_stock,households_number_of_apartments_1965_1984,#,,
...,...,...,...,...,...,...
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_2005,%,,
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_1985_2004,%,,
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_1965_1984,%,,
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_1945_1964,%,,


In [66]:
# List all ETLocal keys that are relevant for the households energy demand category
keys_housing_stock_and_insulation = list(template.loc[filter_housing_stock_and_insulation, :].index.get_level_values(3).unique())

# Preview list
keys_housing_stock_and_insulation

['number_of_inhabitants',
 'residences_roof_surface_available_for_pv',
 'households_number_of_apartments_2005',
 'households_number_of_apartments_1985_2004',
 'households_number_of_apartments_1965_1984',
 'households_number_of_apartments_1945_1964',
 'households_number_of_apartments_1945',
 'households_number_of_terraced_2005',
 'households_number_of_terraced_1985_2004',
 'households_number_of_terraced_1965_1984',
 'households_number_of_terraced_1945_1964',
 'households_number_of_terraced_1945',
 'households_number_of_semi_detached_2005',
 'households_number_of_semi_detached_1985_2004',
 'households_number_of_semi_detached_1965_1984',
 'households_number_of_semi_detached_1945_1964',
 'households_number_of_semi_detached_1945',
 'households_number_of_detached_2005',
 'households_number_of_detached_1985_2004',
 'households_number_of_detached_1965_1984',
 'households_number_of_detached_1945_1964',
 'households_number_of_detached_1945',
 'heat_share_of_apartments_with_block_heating',
 'inpu

Same goes for the buildings. Preview the ETLocal keys that are relevant for the **buildings** energy demand category

In [67]:
# Filter the ETLocal keys that are relevant for the households and buildings building stock and insulation level categories
filter_building_stock_and_insulation = (slice(None), 'buildings', 'buildings_building_stock')

# Preview the filtered template
template.loc[filter_building_stock_and_insulation, :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,unit,value,commit
geo_id,group,subgroup,key,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GM0003,buildings,buildings_building_stock,number_of_buildings,#,,
GM0003,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,,
GM0010,buildings,buildings_building_stock,number_of_buildings,#,,
GM0010,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,,
GM0024,buildings,buildings_building_stock,number_of_buildings,#,,
...,...,...,...,...,...,...
GM1896,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,,
GM0642,buildings,buildings_building_stock,number_of_buildings,#,,
GM0642,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,,
GM0193,buildings,buildings_building_stock,number_of_buildings,#,,


In [68]:
# List all ETLocal keys that are relevant for the households energy demand category
keys_building_stock_and_insulation = list(template.loc[filter_building_stock_and_insulation, :].index.get_level_values(3).unique())

# Preview list
keys_building_stock_and_insulation

['number_of_buildings', 'input_buildings_insulation_level']

In order to fill the ETLocal template, we need to map the PBL input data to the ETLocal (dataset manager) interface elements.

In [69]:
# TODO: this could also be a CSV transformed into a data frame--is this more readable for the notebook user?
mapping_households = {
    
    # Number of apartments
    'households_number_of_apartments_1945': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_apartments_1945_1964': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_apartments_1965_1984': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_apartments_1985_2004': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_apartments_2005': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Aantal woningen (#)'},
    
    # Number of terraced
    'households_number_of_terraced_1945': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_terraced_1945_1964': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_terraced_1965_1984': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_terraced_1985_2004': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_terraced_2005': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Aantal woningen (#)'},
    
    # Number of semi-detached
    'households_number_of_semi_detached_1945': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_semi_detached_1945_1964': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_semi_detached_1965_1984': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_semi_detached_1985_2004': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_semi_detached_2005': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Aantal woningen (#)'},
    
    # Number of detached
    'households_number_of_detached_1945': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_detached_1945_1964': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_detached_1965_1984': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_detached_1985_2004': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Aantal woningen (#)'},
    'households_number_of_detached_2005': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Aantal woningen (#)'},
    
    # Insulation level of apartments
    'input_households_insulation_level_apartments_1945': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_apartments_1945_1964': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_apartments_1965_1984': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_apartments_1985_2004': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_apartments_2005': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    
    # Insulation level of terraced
    'input_households_insulation_level_terraced_1945': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_terraced_1945_1964': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_terraced_1965_1984': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_terraced_1985_2004': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_terraced_2005': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    
    # Insulation level of semi-detached
    'input_households_insulation_level_semi_detached_1945': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_semi_detached_1945_1964': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_semi_detached_1965_1984': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_semi_detached_1985_2004': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_semi_detached_2005': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    
    # Insulation level of detached
    'input_households_insulation_level_detached_1945': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_detached_1945_1964': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_detached_1965_1984': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_detached_1985_2004': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    'input_households_insulation_level_detached_2005': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Gemiddelde netto warmtevraag (kWh/m2)'},
    
    # Share of useful heat demand of apartments
    'input_households_share_of_useful_demand_for_space_heating_apartments_1945': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_apartments_1945_1964': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_apartments_1965_1984': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_apartments_1985_2004': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_apartments_2005': {'Woningtype ETM': 'Appartement', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    
    # Share of useful heat demand of terraced
    'input_households_share_of_useful_demand_for_space_heating_terraced_1945': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_terraced_1945_1964': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_terraced_1965_1984': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_terraced_1985_2004': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_terraced_2005': {'Woningtype ETM': 'Rijtjeshuis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    
    # Share of useful heat demand of semi-detached
    'input_households_share_of_useful_demand_for_space_heating_semi_detached_1945': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_semi_detached_1945_1964': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_semi_detached_1965_1984': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_semi_detached_1985_2004': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_semi_detached_2005': {'Woningtype ETM': 'Hoekhuis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    
    # Share of useful heat demand of detached
    'input_households_share_of_useful_demand_for_space_heating_detached_1945': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '< 1945', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_detached_1945_1964': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1945 - 1964', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_detached_1965_1984': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1965 - 1984', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_detached_1985_2004': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '1985 - 2004', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    'input_households_share_of_useful_demand_for_space_heating_detached_2005': {'Woningtype ETM': 'Vrijstaand huis', 'Bouwjaarklasse ETM': '>= 2005', 'Categorie': 'Functionele vraag ruimteverwarming (% van totaal)'},
    
}

In [70]:
# TODO: this could also be a CSV transformed into a data frame--is this more readable for the notebook user?
mapping_buildings = {
    
    # Number of buildings (EDU)
    'number_of_buildings': 'Aantal gebouwen (#)',
    
    # Insulation level buildings
    'input_buildings_insulation_level': 'Gemiddelde netto warmtevraag (kWh/m2)'   

}

Add **housing** stock and insulation level values to the (ETLocal) dataset manager template

In [71]:
# Add values to (ETLocal) dataset manager template
for municipality in municipalities:
    for etlocal_key, source_values in mapping_households.items():
        # If the housing stock category exists for the municipality, use the EP online data
        try: 
            # Update the value
            template.loc[(municipality, slice(None), slice(None), etlocal_key), 'value'] = df_housing_stock_by_groups.loc[(municipality, source_values['Woningtype ETM'], source_values['Bouwjaarklasse ETM']), source_values['Categorie']]

            # Add a commit message
            template.loc[(municipality, slice(None), slice(None), etlocal_key), 'commit'] = f"Based on both the EP-online dataset and the PBL referentieverbruiken"
        
        # Else, create a fallback value of zero
        except:
            # Update the value
            template.loc[(municipality, slice(None), slice(None), etlocal_key), 'value'] = 0.

            # Add a commit message
            template.loc[(municipality, slice(None), slice(None), etlocal_key), 'commit'] = f"Based on both the EP-online dataset and the PBL referentieverbruiken"
            

In [72]:
# Preview the filtered template
template.loc[filter_housing_stock_and_insulation, :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,unit,value,commit
geo_id,group,subgroup,key,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GM0003,households,households_housing_stock,number_of_inhabitants,#,,
GM0003,households,households_housing_stock,residences_roof_surface_available_for_pv,km2,,
GM0003,households,households_housing_stock,households_number_of_apartments_2005,#,0.000000,Based on both the EP-online dataset and the PB...
GM0003,households,households_housing_stock,households_number_of_apartments_1985_2004,#,0.000000,Based on both the EP-online dataset and the PB...
GM0003,households,households_housing_stock,households_number_of_apartments_1965_1984,#,0.000000,Based on both the EP-online dataset and the PB...
...,...,...,...,...,...,...
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_2005,%,1.164518,Based on both the EP-online dataset and the PB...
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_1985_2004,%,1.508611,Based on both the EP-online dataset and the PB...
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_1965_1984,%,1.072042,Based on both the EP-online dataset and the PB...
GM0193,households,households_insulation_level,input_households_share_of_useful_demand_for_space_heating_detached_1945_1964,%,0.598084,Based on both the EP-online dataset and the PB...


Add **building** stock and insulation level values to the (ETLocal) dataset manager template

In [73]:
# Add values to (ETLocal) dataset manager template
for municipality in municipalities:
    
    # If the municipality is in the correction dict, do not add the number of buildings to the migration file. Only fill the average net heat demand of the municipality it has been merged to.
    if municipality in correction.keys():
        
        etlocal_key = 'input_buildings_insulation_level'
        source_key = 'Gemiddelde netto warmtevraag (kWh/m2)'
        
        # If the housing stock category exists for the municipality, use the BAG data
        try: 
            # Update the value
            template.loc[(municipality, slice(None), slice(None), etlocal_key), 'value'] = df_building_stock_by_groups.loc[(correction[municipality]), source_key]

            # Add a commit message
            template.loc[(municipality, slice(None), slice(None), etlocal_key), 'commit'] = f"Based on the 'verrijkte BAG' by TNO (source: https://energy.nl/publications/verrijkte-bag-energetische-vraagstukken/)."
        except:
            pass


    # Else, just fill the data based on the data source (BAG)
    else:
        
        for etlocal_key, source_key in mapping_buildings.items():   
            
            # If the housing stock category exists for the municipality, use the BAG data
            try: 
                # Update the value
                template.loc[(municipality, slice(None), slice(None), etlocal_key), 'value'] = df_building_stock_by_groups.loc[(municipality), source_key]

                # Add a commit message
                template.loc[(municipality, slice(None), slice(None), etlocal_key), 'commit'] = f"Based on the verrijkte BAG by TNO (source: https://energy.nl/publications/verrijkte-bag-energetische-vraagstukken/)"

            # Else, create a fallback value of zero
            except:
                # Update the value
                template.loc[(municipality, slice(None), slice(None), etlocal_key), 'value'] = 0.

                # Add a commit message
                template.loc[(municipality, slice(None), slice(None), etlocal_key), 'commit'] = f"No data available"

In [74]:
# Preview the filtered template
template.loc[filter_building_stock_and_insulation, :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,unit,value,commit
geo_id,group,subgroup,key,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GM0003,buildings,buildings_building_stock,number_of_buildings,#,,
GM0003,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,270.585527,Based on the 'verrijkte BAG' by TNO (source: h...
GM0010,buildings,buildings_building_stock,number_of_buildings,#,,
GM0010,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,270.585527,Based on the 'verrijkte BAG' by TNO (source: h...
GM0024,buildings,buildings_building_stock,number_of_buildings,#,,
...,...,...,...,...,...,...
GM1896,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,251.685579,Based on the 'verrijkte BAG' by TNO (source: h...
GM0642,buildings,buildings_building_stock,number_of_buildings,#,2579.000000,Based on the 'verrijkte BAG' by TNO (source: h...
GM0642,buildings,buildings_building_stock,input_buildings_insulation_level,kWh/m2,234.620593,Based on the 'verrijkte BAG' by TNO (source: h...
GM0193,buildings,buildings_building_stock,number_of_buildings,#,6368.000000,Based on the 'verrijkte BAG' by TNO (source: h...


#### Initialize migration files

In [75]:
# First, initialize data.csv file based on the config file
path = Path("config", "data.csv")
df_data_csv = pd.read_csv(path, sep=sep, index_col=[0])

# Drop unnamed columns
df_data_csv = df_data_csv.drop(columns=df_data_csv.columns[df_data_csv.columns.str.startswith('Unnamed')])

In [76]:
# Preview dataframe representing data.csv
df_data_csv

Unnamed: 0_level_0,country,name
geo_id,Unnamed: 1_level_1,Unnamed: 2_level_1
GM0003,nl2019,Appingedam
GM0010,nl2019,Delfzijl
GM0024,nl2019,Loppersum
GM1680,nl2019,Aa en Hunze
GM0358,nl2019,Aalsmeer
...,...,...
GM0879,nl2019,Zundert
GM0301,nl2019,Zutphen
GM1896,nl2019,Zwartewaterland
GM0642,nl2019,Zwijndrecht


In [77]:
# Then, initialize commits.yml file by specifying the file path where you want to create the YAML file
path = Path("data", "reporting", "commits.yml")

# Write the document separator to the YAML file
with open(path, 'w') as file:
    file.write('---\n')

#### Fill migration files
Fill the relevant processed data from the ETLocal template in the `data.csv` and `commits.yml` files that are necessary for a ETLocal migration.

In [78]:
# Preview ETLocal template
template

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,unit,value,commit
geo_id,group,subgroup,key,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GM0003,area,area_emission_factors,file_carriers_imported_heat_co2_conversion_per_mj,kg/MJ,,
GM0003,area,area_emission_factors,file_carriers_propane_co2_conversion_per_mj,kg/MJ,,
GM0003,households,households_applications,households_final_demand_electricity_households_final_demand_for_cooking_electricity_parent_share,%,,
GM0003,households,households_applications,households_final_demand_electricity_households_final_demand_for_cooling_electricity_parent_share,%,,
GM0003,households,households_applications,households_final_demand_electricity_households_final_demand_for_hot_water_electricity_parent_share,%,,
...,...,...,...,...,...,...
GM0193,energy,energy_heat_production,input_energy_heat_burner_mt_crude_oil_production,TJ,,
GM0193,energy,energy_heat_production,energy_import_heat_demand,TJ,,
GM0193,energy,energy_energy_demand,input_energy_heat_distribution_ht_loss_share,%,,
GM0193,energy,energy_energy_demand,input_energy_heat_distribution_mt_loss_share,%,,


In [79]:
# List all keys for which values have been added to the template
keys = list(template.dropna().index.get_level_values(3).unique())

# Preview list of keys
keys

['households_number_of_apartments_2005',
 'households_number_of_apartments_1985_2004',
 'households_number_of_apartments_1965_1984',
 'households_number_of_apartments_1945_1964',
 'households_number_of_apartments_1945',
 'households_number_of_terraced_2005',
 'households_number_of_terraced_1985_2004',
 'households_number_of_terraced_1965_1984',
 'households_number_of_terraced_1945_1964',
 'households_number_of_terraced_1945',
 'households_number_of_semi_detached_2005',
 'households_number_of_semi_detached_1985_2004',
 'households_number_of_semi_detached_1965_1984',
 'households_number_of_semi_detached_1945_1964',
 'households_number_of_semi_detached_1945',
 'households_number_of_detached_2005',
 'households_number_of_detached_1985_2004',
 'households_number_of_detached_1965_1984',
 'households_number_of_detached_1945_1964',
 'households_number_of_detached_1945',
 'input_households_insulation_level_apartments_2005',
 'input_households_insulation_level_apartments_1985_2004',
 'input_hous

In [80]:
# Add keys to data.csv migration file
for key in keys:
    df_data_csv.loc[:, key] = float('nan')
    
# Preview data.csv
df_data_csv

Unnamed: 0_level_0,country,name,households_number_of_apartments_2005,households_number_of_apartments_1985_2004,households_number_of_apartments_1965_1984,households_number_of_apartments_1945_1964,households_number_of_apartments_1945,households_number_of_terraced_2005,households_number_of_terraced_1985_2004,households_number_of_terraced_1965_1984,...,input_households_share_of_useful_demand_for_space_heating_semi_detached_1965_1984,input_households_share_of_useful_demand_for_space_heating_semi_detached_1945_1964,input_households_share_of_useful_demand_for_space_heating_semi_detached_1945,input_households_share_of_useful_demand_for_space_heating_detached_2005,input_households_share_of_useful_demand_for_space_heating_detached_1985_2004,input_households_share_of_useful_demand_for_space_heating_detached_1965_1984,input_households_share_of_useful_demand_for_space_heating_detached_1945_1964,input_households_share_of_useful_demand_for_space_heating_detached_1945,input_buildings_insulation_level,number_of_buildings
geo_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GM0003,nl2019,Appingedam,,,,,,,,,...,,,,,,,,,,
GM0010,nl2019,Delfzijl,,,,,,,,,...,,,,,,,,,,
GM0024,nl2019,Loppersum,,,,,,,,,...,,,,,,,,,,
GM1680,nl2019,Aa en Hunze,,,,,,,,,...,,,,,,,,,,
GM0358,nl2019,Aalsmeer,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0879,nl2019,Zundert,,,,,,,,,...,,,,,,,,,,
GM0301,nl2019,Zutphen,,,,,,,,,...,,,,,,,,,,
GM1896,nl2019,Zwartewaterland,,,,,,,,,...,,,,,,,,,,
GM0642,nl2019,Zwijndrecht,,,,,,,,,...,,,,,,,,,,


In [81]:
# Initialize an empty array for the commits
commits = []

# For all relevant keys and for each municipality fill data.csv and commits.yml
for key in keys:
    for municipality in municipalities:
        # Add data value to data.csv
        df_data_csv.loc[municipality, key] = template.loc[(municipality, slice(None), slice(None), key), 'value'].values[0]
        
    # Add commit message to commits.yml
    commits.append({'fields': [key], 'message': template.loc[(slice(None), slice(None), slice(None), key), 'commit'].values[0]})
    
# Preview data for all municipalities in the format required for the data.csv file
df_data_csv

Unnamed: 0_level_0,country,name,households_number_of_apartments_2005,households_number_of_apartments_1985_2004,households_number_of_apartments_1965_1984,households_number_of_apartments_1945_1964,households_number_of_apartments_1945,households_number_of_terraced_2005,households_number_of_terraced_1985_2004,households_number_of_terraced_1965_1984,...,input_households_share_of_useful_demand_for_space_heating_semi_detached_1965_1984,input_households_share_of_useful_demand_for_space_heating_semi_detached_1945_1964,input_households_share_of_useful_demand_for_space_heating_semi_detached_1945,input_households_share_of_useful_demand_for_space_heating_detached_2005,input_households_share_of_useful_demand_for_space_heating_detached_1985_2004,input_households_share_of_useful_demand_for_space_heating_detached_1965_1984,input_households_share_of_useful_demand_for_space_heating_detached_1945_1964,input_households_share_of_useful_demand_for_space_heating_detached_1945,input_buildings_insulation_level,number_of_buildings
geo_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GM0003,nl2019,Appingedam,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,270.585527,
GM0010,nl2019,Delfzijl,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,270.585527,
GM0024,nl2019,Loppersum,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,270.585527,
GM1680,nl2019,Aa en Hunze,163.0,310.0,275.0,124.0,43.0,30.0,224.0,753.0,...,12.208785,6.144921,2.129478,2.802251,9.679759,10.677521,7.648173,18.041910,249.800236,2299.0
GM0358,nl2019,Aalsmeer,1093.0,932.0,451.0,211.0,81.0,1392.0,1065.0,1765.0,...,7.354717,4.322559,3.799890,2.819781,2.695044,2.664897,2.283962,3.370300,226.760376,1146.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0879,nl2019,Zundert,294.0,574.0,161.0,35.0,54.0,287.0,402.0,809.0,...,10.715435,7.877962,3.509186,3.205434,11.885279,11.241960,7.143077,6.290880,264.660701,827.0
GM0301,nl2019,Zutphen,1229.0,1190.0,2570.0,875.0,991.0,620.0,2071.0,2741.0,...,6.543316,3.567067,5.969027,0.601814,2.219638,0.937629,1.014341,1.708486,260.458118,2565.0
GM1896,nl2019,Zwartewaterland,334.0,410.0,130.0,64.0,71.0,308.0,618.0,1364.0,...,11.557494,4.617109,4.631708,1.885249,6.145501,3.780211,1.890966,3.016952,251.685579,1006.0
GM0642,nl2019,Zwijndrecht,804.0,1382.0,3730.0,2041.0,72.0,184.0,1618.0,3340.0,...,7.422702,5.475984,2.780868,0.206119,0.760678,1.093487,0.476573,0.942665,234.620593,2579.0


In [82]:
# Drop the name column
del df_data_csv['name']

# Preview data
df_data_csv

Unnamed: 0_level_0,country,households_number_of_apartments_2005,households_number_of_apartments_1985_2004,households_number_of_apartments_1965_1984,households_number_of_apartments_1945_1964,households_number_of_apartments_1945,households_number_of_terraced_2005,households_number_of_terraced_1985_2004,households_number_of_terraced_1965_1984,households_number_of_terraced_1945_1964,...,input_households_share_of_useful_demand_for_space_heating_semi_detached_1965_1984,input_households_share_of_useful_demand_for_space_heating_semi_detached_1945_1964,input_households_share_of_useful_demand_for_space_heating_semi_detached_1945,input_households_share_of_useful_demand_for_space_heating_detached_2005,input_households_share_of_useful_demand_for_space_heating_detached_1985_2004,input_households_share_of_useful_demand_for_space_heating_detached_1965_1984,input_households_share_of_useful_demand_for_space_heating_detached_1945_1964,input_households_share_of_useful_demand_for_space_heating_detached_1945,input_buildings_insulation_level,number_of_buildings
geo_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
GM0003,nl2019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,270.585527,
GM0010,nl2019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,270.585527,
GM0024,nl2019,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,270.585527,
GM1680,nl2019,163.0,310.0,275.0,124.0,43.0,30.0,224.0,753.0,131.0,...,12.208785,6.144921,2.129478,2.802251,9.679759,10.677521,7.648173,18.041910,249.800236,2299.0
GM0358,nl2019,1093.0,932.0,451.0,211.0,81.0,1392.0,1065.0,1765.0,543.0,...,7.354717,4.322559,3.799890,2.819781,2.695044,2.664897,2.283962,3.370300,226.760376,1146.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
GM0879,nl2019,294.0,574.0,161.0,35.0,54.0,287.0,402.0,809.0,136.0,...,10.715435,7.877962,3.509186,3.205434,11.885279,11.241960,7.143077,6.290880,264.660701,827.0
GM0301,nl2019,1229.0,1190.0,2570.0,875.0,991.0,620.0,2071.0,2741.0,980.0,...,6.543316,3.567067,5.969027,0.601814,2.219638,0.937629,1.014341,1.708486,260.458118,2565.0
GM1896,nl2019,334.0,410.0,130.0,64.0,71.0,308.0,618.0,1364.0,410.0,...,11.557494,4.617109,4.631708,1.885249,6.145501,3.780211,1.890966,3.016952,251.685579,1006.0
GM0642,nl2019,804.0,1382.0,3730.0,2041.0,72.0,184.0,1618.0,3340.0,1955.0,...,7.422702,5.475984,2.780868,0.206119,0.760678,1.093487,0.476573,0.942665,234.620593,2579.0


In [83]:
# Write data.csv to processed data directory
df_data_csv.to_csv(f"data/reporting/data.csv")

In [84]:
# TODO: Beautify the yaml data and make it more readable
# commits_yaml = yaml.dump(commits, sort_keys=False, indent=4, default_flow_style=False, default_style='|')

# Write the updated data back to the YAML file
path = Path("data", "reporting", "commits.yml")

with open(path, 'w') as file:
    yaml.safe_dump(commits, file)