# Heat pump data 

### Read HDF5 file, convert to pandas format, concat data for 2018-2020

This file contains the code to

1) Read in the load pump data for 36 houses in hdf5 format, each year stored in a seperate file

2) Convert the data format to a python dictionary containing the load data of each house over the available time span

         The dictionary is structured as followed: 

        {

         'House 1': dataframe containing load data of house 1,

         'House 2': dataframe containing load data of house 2,

             ...

         }

3) Save the dictionary to a pickle file at 'Data/heatpump/data_heatpump.pkl'

(4. Additional code used to check code functionality and data quality)

-------------

#### Imports

In [62]:
import h5py
import pandas as pd
import numpy as np
import pickle 
from datetime import datetime
import math

pd.options.mode.chained_assignment = None 

#### Functions to convert data

In [63]:
def hdf_to_pandas(hdf_dataset):
    column_type_dict = {x:str(y[0]) for x,y in hdf_dataset.dtype.fields.items()}
    column_list = []
    for index in column_type_dict:
        column_list.append(index)
    list_of_rows = []
    for line in range(0, hdf_dataset.size):
        list_of_rows.append(np.asarray(hdf_dataset[line]).tolist())
    return pd.DataFrame(data=list_of_rows, columns=column_list)

def first_n_digits(num, n):
    return num // 10 ** (int(math.log(num, 10)) - n + 1)

______________

#### load data for 2018

In [64]:
file = h5py.File('Data/HDF5data/heatpumps/2018_data_15min.hdf5', 'r')
dset_no_pv = file['NO_PV']
dset_pv = file["WITH_PV"]

df_dict_2018 = {}
for key in dset_no_pv.keys():
    #dset_house = dset_no_pv[key]
    df_dict_2018[key] = hdf_to_pandas(dset_no_pv[key]["HEATPUMP"]['table'])
for key in dset_pv.keys():
    df_dict_2018[key] = hdf_to_pandas(dset_pv[key]["HEATPUMP"]['table'])

#### load data for 2019

In [67]:
file = h5py.File('Data/HDF5data/heatpumps/2019_data_15min.hdf5', 'r')
dset_no_pv = file['NO_PV']
dset_pv = file["WITH_PV"]

df_dict_2019 = {}
for key in dset_no_pv.keys():
    #dset_house = dset_no_pv[key]
    df_dict_2019[key] = hdf_to_pandas(dset_no_pv[key]["HEATPUMP"]['table'])
for key in dset_pv.keys():
    df_dict_2019[key] = hdf_to_pandas(dset_pv[key]["HEATPUMP"]['table'])

#### load data for 2020

In [70]:
file = h5py.File('Data/HDF5data/heatpumps/2020_data_15min.hdf5', 'r')
dset_no_pv = file['NO_PV']
dset_pv = file["WITH_PV"]

df_dict_2020 = {}
for key in dset_no_pv.keys():
    #dset_house = dset_no_pv[key]
    df_dict_2020[key] = hdf_to_pandas(dset_no_pv[key]["HEATPUMP"]['table'])
for key in dset_pv.keys():
    df_dict_2020[key] = hdf_to_pandas(dset_pv[key]["HEATPUMP"]['table'])

#### concat load data, 2018-2020 for 36 houses

In [73]:
df_dict = {}

for key_house in df_dict_2020:
    df_dict[key_house] = pd.concat([df_dict_2018[key_house], df_dict_2019[key_house], df_dict_2020[key_house]])

for key_house in df_dict:
    if len(df_dict[key_house]) != 105216:
        print("issue with " + str(key_house))

print("data for {} houses".format(len(df_dict)))

data for 36 houses


#### save to pickle file

In [74]:
with open('Data/heatpump/data_heatpump.pkl', 'wb') as f:
    pickle.dump(df_dict, f)

#### read saved data

In [None]:
with open('Data/heatpump/data_heatpump.pkl', 'rb') as f:
    weather_dict = pickle.load(f)

-----------------------------

## Additional

#### check data

In [65]:
checker = False
for index in df_dict_2018:
    if (len(df_dict_2018[index]['index'])) != 35040:
        print("issue with index " + str(index))
        checker = True
if not checker:
    print('all indices have the same size (1,35040)')

all indices have the same size (1,35040)


In [75]:
for house in df_dict:
    df_dict[house]['time_difference'] = df_dict[house]['index'] - df_dict[house]['index'].shift(1)
    if df_dict[house]['time_difference'].value_counts()[900.0] != 105215:
        print(house)
else:
    print('time stamps continues 15 min intervalls')

time stamps continues 15 min intervalls
