In [1]:
import pandas as pd
import numpy as np

# Building stock data on house-level (BAG) 



This notebook uses the following datasets to merge ep-online(BAG) data with klimaatmonitordata:
- BAG data per house, including buildingtypes, retrieved from: 
- Energy data per municipality, retrieved from: klimaatmonitor

@author: Mark Hupkens
last edit: 13-05-2019

In [2]:
'''Import Data'''

# BAG data
df = pd.read_csv("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/masterdf.csv", error_bad_lines=False, sep =';') # handled data from https://www.rvo.nl/sites/default/files/2019/01/Voorlopige_labels_okt2018.zip

# Energy Data
df_energy_buurt = pd.read_excel("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/Klimaatmonitor (energieverbruik , stadsverwarming, pv - Buurten,wijken en gemeenten 2017).xls",sheet_name='Buurt')
df_energy_gemeente = pd.read_excel("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/Klimaatmonitor (energieverbruik , stadsverwarming, pv - Buurten,wijken en gemeenten 2017).xls",sheet_name='Gemeente')
df_energy_wijk = pd.read_excel("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/Klimaatmonitor (energieverbruik , stadsverwarming, pv - Buurten,wijken en gemeenten 2017).xls",sheet_name='Wijk')

# Replace string values with nan value 0.424
df_energy_buurt.replace(to_replace='?',value=0.4242,inplace=True)
df_energy_buurt.replace(to_replace='-',value=0.4242,inplace=True)

df_energy_gemeente.replace(to_replace='?',value=0.4242,inplace=True)
df_energy_gemeente.replace(to_replace='-',value=0.4242,inplace=True)

df_energy_wijk.replace(to_replace='?',value=0.4242,inplace=True)
df_energy_wijk.replace(to_replace='-',value=0.4242,inplace=True)

In [3]:
print('number of neighbourhoods in bag', len(df['Neighbourhood Name'].unique()))
print('number of neighbourhoods in klimaatmonitor', len(df_energy_buurt.Buurt.unique()))

number of neighbourhoods in bag 12040
number of neighbourhoods in klimaatmonitor 12252


## Data on buurt-level

Firstly, binary columns have to be created to allow for counting in a groupby dataframe. 13 new columns are added to show housing type (c1-c6) and label (A-G).

In [4]:
''' Create housing matrix'''

# Add housing matrix to enable building type count at the end of the script
df.loc[df['Housing Type']=='C1', 'Houses Detached BAG2018d'] = 1 
df.loc[df['Housing Type']=='C2', 'Houses 2u1Roof BAG2018d'] = 1 
df.loc[df['Housing Type']=='C3', 'Houses Corner BAG2018d'] = 1
df.loc[df['Housing Type']=='C4', 'Houses Row BAG2018'] = 1
df.loc[df['Housing Type']=='C5', 'Houses SingleFloorAppartments c5 BAG2018d'] = 1
df.loc[df['Housing Type']=='C6', 'Houses MultiFloorAppartments c6 BAG2018d'] = 1

# Add preliminary evaluation
df.loc[df['Preliminary Evaluation']=='A', 'Label A BAG2018d'] = 1 
df.loc[df['Preliminary Evaluation']=='B', 'Label B BAG2018d'] = 1 
df.loc[df['Preliminary Evaluation']=='C', 'Label C BAG2018d'] = 1
df.loc[df['Preliminary Evaluation']=='D', 'Label D BAG2018d'] = 1
df.loc[df['Preliminary Evaluation']=='E', 'Label E BAG2018d'] = 1
df.loc[df['Preliminary Evaluation']=='F', 'Label F BAG2018d'] = 1
df.loc[df['Preliminary Evaluation']=='G', 'Label G BAG2018d'] = 1

df.head()

Unnamed: 0,Zip Code(6),House No,Appartment No,Construction Year,Housing Type,Preliminary Evaluation,Address,Neighbourhood Code,District Code,Municipality Code,...,Houses Row BAG2018,Houses SingleFloorAppartments c5 BAG2018d,Houses MultiFloorAppartments c6 BAG2018d,Label A BAG2018d,Label B BAG2018d,Label C BAG2018d,Label D BAG2018d,Label E BAG2018d,Label F BAG2018d,Label G BAG2018d
0,3752NX,25,,1983,C3,C,3752NX_25,3130006.0,31300.0,313.0,...,,,,,,1.0,,,,
1,3752NX,26,,1983,C3,C,3752NX_26,3130006.0,31300.0,313.0,...,,,,,,1.0,,,,
2,3752NX,27,,1983,C4,C,3752NX_27,3130006.0,31300.0,313.0,...,1.0,,,,,1.0,,,,
3,3752NZ,28,,1983,C4,C,3752NZ_28,3130006.0,31300.0,313.0,...,1.0,,,,,1.0,,,,
4,3752NZ,29,,1983,C4,C,3752NZ_29,3130006.0,31300.0,313.0,...,1.0,,,,,1.0,,,,


### 1. Group Data

In [5]:
'''Group adressdata in neighborhoods within municipalities'''

# group data
df_bag = df.groupby(['Municipality Name','Neighbourhood Name','Neighbourhood Code']).agg({'House No':'count',
                                              'Houses Detached BAG2018d':'count',
                                               'Houses 2u1Roof BAG2018d':'count',
                                              'Houses Corner BAG2018d':'count',
                                              'Houses Row BAG2018':'count',
                                              'Houses SingleFloorAppartments c5 BAG2018d':'count',
                                              'Houses MultiFloorAppartments c6 BAG2018d':'count',
                                              'Construction Year':'mean',
                                               'Label A BAG2018d':'count',
                                               'Label B BAG2018d':'count',
                                                'Label C BAG2018d':'count',
                                                'Label D BAG2018d':'count',
                                                'Label E BAG2018d':'count',
                                                'Label F BAG2018d':'count',
                                                'Label G BAG2018d':'count'})

# Rename column and duplicate index for merge later on
df_bag.rename(columns={'House No':'Houses All BAG2018d'})
df_bag['Neighbourhood Name_2'] = df_bag.index.get_level_values('Neighbourhood Name')
df_bag['Municipality Name_2'] = df_bag.index.get_level_values('Municipality Name')
df_bag['Neighbourhood Code_2'] = df_bag.index.get_level_values('Neighbourhood Code').map(str).str.split(".").str[0] # string values for easy merge

In [6]:
'''Group adressdata in districts within municipalities'''

# group data
df_bag_wijk = df.groupby(['Municipality Name','District Name','District Code']).agg({'House No':'count',
                                              'Houses Detached BAG2018d':'count',
                                               'Houses 2u1Roof BAG2018d':'count',
                                              'Houses Corner BAG2018d':'count',
                                              'Houses Row BAG2018':'count',
                                              'Houses SingleFloorAppartments c5 BAG2018d':'count',
                                              'Houses MultiFloorAppartments c6 BAG2018d':'count',
                                              'Construction Year':'mean',
                                               'Label A BAG2018d':'count',
                                               'Label B BAG2018d':'count',
                                                'Label C BAG2018d':'count',
                                                'Label D BAG2018d':'count',
                                                'Label E BAG2018d':'count',
                                                'Label F BAG2018d':'count',
                                                'Label G BAG2018d':'count'})

# Rename column and duplicate index for merge later on
df_bag_wijk.rename(columns={'House No':'Houses All BAG2018d'})
df_bag_wijk['District Name_2'] = df_bag_wijk.index.get_level_values('District Name')
df_bag_wijk['Municipality Name_2'] = df_bag_wijk.index.get_level_values('Municipality Name')
df_bag_wijk['District Code_2'] = df_bag_wijk.index.get_level_values('District Code').map(str).str.split(".").str[0] # string values for easy merge

In [7]:
df_bag.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,House No,Houses Detached BAG2018d,Houses 2u1Roof BAG2018d,Houses Corner BAG2018d,Houses Row BAG2018,Houses SingleFloorAppartments c5 BAG2018d,Houses MultiFloorAppartments c6 BAG2018d,Construction Year,Label A BAG2018d,Label B BAG2018d,Label C BAG2018d,Label D BAG2018d,Label E BAG2018d,Label F BAG2018d,Label G BAG2018d,Neighbourhood Name_2,Municipality Name_2,Neighbourhood Code_2
Municipality Name,Neighbourhood Name,Neighbourhood Code,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
's-Gravenhage,Archipelbuurt,5180546.0,3702,17,27,96,772,2788,0,1925.363857,46,695,474,1,102,716,1666,Archipelbuurt,'s-Gravenhage,5180546
's-Gravenhage,Arendsdorp,5180478.0,1152,6,6,66,220,854,0,1958.336806,245,0,0,0,390,220,297,Arendsdorp,'s-Gravenhage,5180478
's-Gravenhage,Belgisch Park,5180271.0,4144,42,54,167,556,3324,0,1949.673986,44,151,1360,1,203,479,1905,Belgisch Park,'s-Gravenhage,5180271
's-Gravenhage,Bezuidenhout-Midden,5182665.0,2507,1,3,40,361,2102,0,1957.71081,360,271,117,0,1034,278,447,Bezuidenhout-Midden,'s-Gravenhage,5182665
's-Gravenhage,Bezuidenhout-Oost,5182666.0,5245,1,0,18,136,5089,0,1930.33346,39,135,137,0,351,128,4454,Bezuidenhout-Oost,'s-Gravenhage,5182666


### 2. Merge BAG with Klimaatmonitor

In [8]:
'''Buurt: merge grouped bag data with klimaatmonitordata on buurt'''

df_merged_buurt = df_bag.merge(df_energy_buurt,left_on='Neighbourhood Name_2', right_on='Buurt',how='inner') # keep intersection of keys
df_merged_buurt = df_merged_buurt.groupby(['Municipality Name_2','Neighbourhood Name_2','Neighbourhood Code_2']).mean() # group by original index

In [9]:
'''Gemeente merge grouped bag data with klimaatmonitordata on Gemeente'''

df_merged_gemeente = df_bag.merge(df_energy_gemeente,left_on='Municipality Name', right_on='Gemeente',how='inner') # keep intersection of keys
df_merged_gemeente = df_merged_gemeente.groupby(['Municipality Name_2']).mean() # group by original index

In [10]:
'''Wijk merge grouped bag data with klimaatmonitordata on wijk'''

df_merged_wijk = df_bag_wijk.merge(df_energy_wijk,left_on='District Name', right_on='Wijk',how='inner') # keep intersection of keys
df_merged_wijk = df_merged_wijk.groupby(['Municipality Name_2','District Name_2','District Code_2']).mean() # group by original index

### 3. Merge data with Model-setup framework
Import modelsetup file and merge build-environment data with it on municipality name

In [11]:
# Remove special characters from column names
df_merged_gemeente.columns = df_merged_gemeente.columns.str.replace("[","").str.replace("]","")
df_merged_buurt.columns = df_merged_gemeente.columns.str.replace("[","").str.replace("]","")
df_merged_wijk.columns = df_merged_gemeente.columns.str.replace("[","").str.replace("]","")

In [12]:
# Import model setup files

df_mod_gemeente = pd.read_excel("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/ModelSetUpEnergieNL02 (1).xlsx",sheet_name='gemeente')
df_mod_buurt = pd.read_excel("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/ModelSetUpEnergieNL02 (1).xlsx",sheet_name='buurt')
df_mod_wijk = pd.read_excel("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/ModelSetUpEnergieNL02 (1).xlsx",sheet_name='wijk') # wijkdata horrible from klimaatmonitor
df_mod_mod = pd.read_excel("D:/markhupkens/Dropbox/EnTransitionNL/0. Data/ModelSetUpEnergieNL02 (1).xlsx",sheet_name='ModelSpecification')

In [13]:
# split entity string to match building data on municipality name
df_mod_gemeente['Municipality Name'] = df_mod_gemeente["Entities"].str.split(" G").str[0] 
df_mod_wijk['Wijk Code'] = df_mod_wijk["Entities"].str.split(" W").str[-1].str.strip("K").str.strip(" ").str.lstrip("0") # Wijk on wijkcode, ditching leading 0's
df_mod_buurt['Buurt Code'] = df_mod_buurt["Entities"].str.split(" B").str[-1].str.strip("U").str.strip(" ").str.lstrip("0") # Buurt on buurt code, ditching leading 0's

In [14]:
# Check
len_mod_gem = len(df_mod_gemeente)
len_mod_buurt = len(df_mod_buurt)
len_mod_wijk = len(df_mod_wijk)

In [15]:
'''Merge prepared data with modelsetup data'''

# Gemeenten
df_mod_gemeente = df_mod_gemeente.merge(df_merged_gemeente, left_on='Municipality Name', right_on= df_merged_gemeente.index)
df_mod_gemeente = df_mod_gemeente.drop(['Municipality Name'], axis=1)

# Buurten
df_mod_buurt = df_mod_buurt.merge(df_merged_buurt, left_on='Buurt Code', right_on= df_merged_buurt.index.get_level_values('Neighbourhood Code_2'))
# df_mod_buurt = df_mod_buurt.drop(['Buurt Name'], axis=1)

# Wijken
df_mod_wijk = df_mod_wijk.merge(df_merged_wijk, left_on='Wijk Code', right_on=df_merged_wijk.index.get_level_values('District Code_2'))
# df_mod_wijk = df_mod_wijk.drop(['District Name_2'], axis=1)

In [16]:
print(len(df_mod_gemeente) - len_mod_gem,"Municipalities gained")
print(len(df_mod_buurt)- len_mod_buurt,"Buurten gained")
print(len(df_mod_wijk)- len_mod_wijk,"wijken gained")

-40 Municipalities gained
-732 Buurten gained
-223 wijken gained


In [22]:
print(len(df_mod_gemeente)) 
print(len(df_mod_buurt))
print(len(df_mod_wijk))

340
12573
2863


### 4. Export new modelspecification file

In [20]:
# export as xlsx to genereate new modelspecification file
from pandas import ExcelWriter

with pd.ExcelWriter('D:/markhupkens/Dropbox/EnTransitionNL/0. Data/ModelSetUpEnergyNL01_MH.xlsx') as writer:  # doctest: +SKIP
    df_mod_buurt.to_excel(writer, sheet_name='buurt')
    df_mod_wijk.to_excel(writer, sheet_name='wijk')
    df_mod_gemeente.to_excel(writer, sheet_name='gemeente')
    df_mod_mod.to_excel(writer, sheet_name='ModelSpecification')