In [1]:
import pandas as pd
import h5py

file_path = r"C:\Users\kubaw\Downloads\2019_data_60min.hdf5"

with h5py.File(file_path, 'r') as f:
    print("Groups and Datasets: %s" % list(f.keys()))
    

    if 'NO_PV' in f.keys():
        group = f['NO_PV']
        print("Datasets in 'NO_PV': %s" % list(group.keys()))
        if 'data' in group.keys(): 
            data_set = group['data']
            data = pd.DataFrame(data=data_set[:])
            print(data.head())
        else:
            print("The expected dataset 'data' is not found within 'NO_PV'.")
    else:
        print("The group 'NO_PV' does not exist in the file.")

Groups and Datasets: ['MISC', 'NO_PV', 'WITH_PV']
Datasets in 'NO_PV': ['SFH10', 'SFH11', 'SFH12', 'SFH14', 'SFH16', 'SFH17', 'SFH18', 'SFH19', 'SFH20', 'SFH21', 'SFH22', 'SFH23', 'SFH25', 'SFH27', 'SFH28', 'SFH29', 'SFH3', 'SFH30', 'SFH31', 'SFH32', 'SFH34', 'SFH35', 'SFH36', 'SFH37', 'SFH38', 'SFH39', 'SFH4', 'SFH40', 'SFH5', 'SFH6', 'SFH7', 'SFH8', 'SFH9']
The expected dataset 'data' is not found within 'NO_PV'.


In [2]:
import h5py

file_path = r"C:\Users\kubaw\Downloads\2019_data_60min.hdf5"

with h5py.File(file_path, 'r') as f:
    groups_and_datasets = list(f.keys())
    detailed_structure = {}
    for key in groups_and_datasets:
        if isinstance(f[key], h5py.Group):
            detailed_structure[key] = list(f[key].keys())

groups_and_datasets, detailed_structure

(['MISC', 'NO_PV', 'WITH_PV'],
 {'MISC': ['ES1', 'PV1'],
  'NO_PV': ['SFH10',
   'SFH11',
   'SFH12',
   'SFH14',
   'SFH16',
   'SFH17',
   'SFH18',
   'SFH19',
   'SFH20',
   'SFH21',
   'SFH22',
   'SFH23',
   'SFH25',
   'SFH27',
   'SFH28',
   'SFH29',
   'SFH3',
   'SFH30',
   'SFH31',
   'SFH32',
   'SFH34',
   'SFH35',
   'SFH36',
   'SFH37',
   'SFH38',
   'SFH39',
   'SFH4',
   'SFH40',
   'SFH5',
   'SFH6',
   'SFH7',
   'SFH8',
   'SFH9'],
  'WITH_PV': ['SFH13', 'SFH15', 'SFH26', 'SFH33']})

In [3]:
import h5py

with h5py.File(file_path, 'r') as f:
    sfh10 = f['NO_PV']['SFH20']

    if isinstance(sfh10, h5py.Dataset):
        print("SFH10 is a dataset.")
        print("Data type:", sfh10.dtype)
        print("Shape:", sfh10.shape)
    else:
        print("SFH10 is a group. Contents:", list(sfh10.keys()))

SFH10 is a group. Contents: ['HEATPUMP', 'HOUSEHOLD']


In [4]:
with h5py.File(file_path, 'r') as f:
    table_dataset = f['NO_PV']['SFH9']['HOUSEHOLD']['table']

    if isinstance(table_dataset, h5py.Dataset):
        data = pd.DataFrame(data=table_dataset[:])
    else:
        print("Expected a dataset but found a different type.")

In [11]:
data_plot = data["P_TOT"] 
data_plot.sum()

4678755.173559909

In [6]:
# Convert Series to DataFrame and reset the index
df = data_plot.reset_index()
df.columns = ['HourOfYear', 'Consumption']  

print(df.dtypes)

df['HourOfYear'] = df['HourOfYear'].astype(int)

df['HourOfDay'] = df['HourOfYear'] % 24


HourOfYear       int64
Consumption    float64
dtype: object


In [93]:
df.drop(columns=['HourOfDay'], inplace=True)
df.to_csv(r'C:\Users\kubaw\Desktop\DELFT\THESIS\CH5\hourly_consumption_gemany2.csv', index=False)

In [7]:
# SFH13 - 2900
# SFH26 - 2402
# SFH26 - 2343
# SFH20 - 3046
# SFH11 - 3105
import plotly.graph_objs as go
import plotly.express as px

In [10]:

fig = px.box(df, x='HourOfDay', y='Consumption', title='Hourly Electricity Consumption')

fig.update_layout(
    title='Daily Energy',
    plot_bgcolor='#f2f1ee',  
    paper_bgcolor='white',  
    yaxis_title='Energy [Wh/day]',
    legend_title='Type',
    xaxis_title='Date',
    xaxis=dict(
        showline=True,
        showgrid=True,
        gridcolor='white',
        linecolor='white',
    ),
    yaxis=dict(
        showline=True,
        showgrid=True,
        gridcolor='white', 
        linecolor='white',
    )
)


fig.update_traces(marker_color='#708f73')


# Update the font size
fig.update_layout(
    title_font_size=14,
    legend_title_font_size=14,
    width = 800,
    height = 700
)


fig.show()