In [1]:
import pandas as pd
import numpy as np
from os.path import join
from nilmtk.datastore import Key
from nilmtk.measurement import LEVEL_NAMES
from nilmtk.utils import check_directory_exists, get_datastore, get_module_directory
from nilm_metadata import convert_yaml_to_hdf5
from copy import deepcopy


In [2]:
def reindex_fill_na(df, idx):
    df_copy = deepcopy(df)
    df_copy = df_copy.reindex(idx)

    power_columns = [
        x for x in df.columns if x[0] in ['power']]
    non_power_columns = [x for x in df.columns if x not in power_columns]

    for power in power_columns:
        df_copy[power].fillna(0, inplace=True)
    for measurement in non_power_columns:
        df_copy[measurement].fillna(df[measurement].median(), inplace=True)

    return df_copy


In [3]:
column_mapping = {
    'frequency': ('frequency', ""),
    'voltage': ('voltage', ""),
    'W': ('power', 'active'),
    'energy': ('energy', 'apparent'),
    'A': ('current', ''),
    'reactive_power': ('power', 'reactive'),
    'apparent_power': ('power', 'apparent'),
    'power_factor': ('pf', ''),
    'PF': ('pf', ''),
    'phase_angle': ('phi', ''),
    'VA': ('power', 'apparent'),
    'VAR': ('power', 'reactive'),
    'VLN': ('voltage', ""),
    'V': ('voltage', ""),
    'f': ('frequency', "")
}

In [4]:
TIMESTAMP_COLUMN_NAME = "timestamp"
TIMEZONE = "Europe/London"
START_DATETIME, END_DATETIME = '2021-09-30', '2021-10-05'
FREQ = "1T"

In [5]:
def convert_iawe(iawe_path, output_filename, format="HDF"):
    """
    Parameters
    ----------
    iawe_path : str
        The root path of the iawe dataset.
    output_filename : str
        The destination filename (including path and suffix).
    """

    check_directory_exists(iawe_path)
    idx = pd.date_range(start=START_DATETIME, end=END_DATETIME, freq=FREQ)
    idx = idx.tz_localize('GMT').tz_convert(TIMEZONE)

    # Open data store
    store = get_datastore(output_filename, format, mode='w')
    electricity_path = join(iawe_path, "electricity")

    # Mains data
    for chan in range(1, 6): #change range according to the number of mains plus devices 
        key = Key(building=1, meter=chan)
        filename = join(electricity_path, "%d.csv" % chan)
        print('Loading ', chan)
        df = pd.read_csv(filename, dtype=np.float64, na_values='\\N')
        df.drop_duplicates(subset=["timestamp"], inplace=True)
        df.index = pd.to_datetime(df.timestamp.values, unit='s', utc=True)
        df = df.tz_convert(TIMEZONE)
        df = df.drop(TIMESTAMP_COLUMN_NAME, 1)
        df.columns = pd.MultiIndex.from_tuples(
            [column_mapping[x] for x in df.columns],
            names=LEVEL_NAMES
        )
        df = df.apply(pd.to_numeric, errors='ignore')
        df = df.dropna()
        df = df.astype(np.float32)
        df = df.sort_index()
        df = df.resample("1T").mean()
        df = reindex_fill_na(df, idx)
        assert df.isnull().sum().sum() == 0
        store.put(str(key), df)
    store.close()
    
   # metadata_dir = join(get_module_directory(), 'dataset_converters', 'iawe', 'metadata')
    metadata_dir = (r'C:\Users\micki\nilmtk_test\SELdata\metadata')
    convert_yaml_to_hdf5(metadata_dir, output_filename)

    print("Done converting iAWE to HDF5!")

In [6]:
convert_iawe('C:\\Users\\micki\\nilmtk_test\\SELdata\\house_1', "C:\\Users\\micki\\nilmtk_test\\converted_v14.h5")

Loading  1
Loading  2
Loading  3
Loading  4
Loading  5
Done converting YAML metadata to HDF5!
Done converting iAWE to HDF5!


In [7]:
# # from my NILM experiments with iAWE dataset 

# from nilmtk import DataSet

# iawe = DataSet(r'C:\Users\micki\nilmtk_test\converted20211007_001337.h5')
# elec = iawe.buildings[1].elec
# elec

In [8]:
import pandas as pd 
import h5py
iawe = pd.HDFStore(r'C:\Users\micki\nilmtk_test\converted_v14.h5')
iawe.keys()
iawe_df = iawe['/building1/elec/meter1']
iawe_df

physical_quantity,power
type,active
2021-09-30 01:00:00+01:00,5.204736
2021-09-30 01:01:00+01:00,5.291828
2021-09-30 01:02:00+01:00,0.370212
2021-09-30 01:03:00+01:00,0.110858
2021-09-30 01:04:00+01:00,0.381582
...,...
2021-10-05 00:56:00+01:00,0.000000
2021-10-05 00:57:00+01:00,0.000000
2021-10-05 00:58:00+01:00,0.000000
2021-10-05 00:59:00+01:00,0.000000


In [18]:
iawe.keys()

['/building1/elec/meter1',
 '/building1/elec/meter2',
 '/building1/elec/meter3',
 '/building1/elec/meter4',
 '/building1/elec/meter5']

In [19]:
import h5py
import numpy as np
import pandas as pd

# dataset = h5py.File(r'C:\Users\micki\nilmtk_test\data\iawe.h5', 'r')
dataset = h5py.File(r'C:\Users\micki\nilmtk_test\converted_v12.h5', 'r')


In [20]:
dataset.keys()

<KeysViewHDF5 ['building1']>

In [21]:
building1 = dataset['building1']
building1.keys()

<KeysViewHDF5 ['elec']>

In [22]:
dataset['building1']['elec'].keys()

<KeysViewHDF5 ['meter1', 'meter2', 'meter3', 'meter4', 'meter5']>

In [23]:
dataset['building1']['elec']['meter2'].keys()

<KeysViewHDF5 ['_i_table', 'table']>

In [2]:
from nilmtk import DataSet

sel = DataSet(r'C:\Users\micki\nilmtk_test\converted_v12.h5')
elec = sel.buildings[1].elec
elec


MeterGroup(meters=
  ElecMeter(instance=1, building=1, dataset='SEL', site_meter, appliances=[])
  ElecMeter(instance=2, building=1, dataset='SEL', appliances=[Appliance(type='fridge', instance=1)])
  ElecMeter(instance=3, building=1, dataset='SEL', appliances=[Appliance(type='electric oven', instance=1)])
  ElecMeter(instance=4, building=1, dataset='SEL', appliances=[Appliance(type='kettle', instance=1)])
  ElecMeter(instance=5, building=1, dataset='SEL', appliances=[Appliance(type='stove', instance=1)])
)