Load Dataset

In [29]:
from nilmtk import DataSet
import pandas as pd
import numpy as np
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

# Use data in  house 1,2 and 5 only
house_indicies = [1, 2, 5]

# Load UK-DALE .h5 dataset
ukdale = DataSet(r'C:\Users\Raymond Tie\Desktop\NILM\datasets\ukdale.h5')

#Hyperparameters
sample_period = 6;
noise_threshold = 5; #Noise threshold in Watts



In [None]:

# ---------------------------------------------------------------------------------------------------
#                                        Main Meter data capturing
#
# ---------------------------------------------------------------------------------------------------

# Store all processed data
all_house_data = []
# Appliance names (matching original code)
appliance_name = [['kettle', 'microwave', 'fridge', 'dishwasher', 'washer dryer'],
                   ['kettle', 'microwave', 'fridge', 'dish_washer', 'washer dryer'],
                   ['kettle', 'microwave', 'fridge_freezer', 'dishwasher', 'washer dryer']]

appliance_name2 = ['kettle', 'microwave', 'fridge', 'dishwasher', 'washingmachine']


#Load Power in the selectec houses
for idx, house_id in enumerate(house_indicies):
    print(f"Loading power data for Building {house_id}")

    #Get electricity data for that house
    elec = ukdale.buildings[house_id].elec #elec is a MeterGroup object that represents all the meters in the building

    #Load mains(aggregated power)
    mains = elec.mains()
    df_mains = next(mains.load(sample_period=sample_period))

    #Rename the column name
    df_mains = df_mains['power']['active']
    df_mains = df_mains.to_frame(name='P_mains')


    # ---------------------------------------------------------------------------------------------------
    #  Data Quality Filtering for mains data
    # ---------------------------------------------------------------------------------------------------

    #Handling missing Value using forward fill (limit to 30 samples (180 seconds))
    df_mains = df_mains.resample(f"{sample_period}S").mean() ##Regularize timestamps
    df_mains = df_mains.fillna(method='ffill', limit=30) ## Fill short gaps (<= 30 samples) with the last valid observation

    #Delete the rest of NaN Values
    df_mains = df_mains.dropna().copy()

    #Delete negative power if exists
    df_mains = df_mains[df_mains["P_mains"] > 0]


    #Set the noise signal in total power to 0
    df_mains[df_mains < noise_threshold] = 0  

    # ---------------------------------------------------------------------------------------------------
    #                                        Appliances data capturing
    #
    # ---------------------------------------------------------------------------------------------------

    for app in appliance_name[idx]:
        #Check i applainces exists in the building
        appliance_found = False
        for appliances in elec.appliances:
            if appliances.identifier.type == app:
                appliance_found = True
                break

        if appliance_found:
            #Load applaince data
            df_app = next(elec[app].load(sample_period=sample_period))
            df_app = df_app['power']['active'].to_frame(name=app)
            # ---------------------------------------------------------------------------------------------------
            #  Data Quality Filtering for mains data
            # ---------------------------------------------------------------------------------------------------
            df_app = df_app.resample(f"{sample_period}S").mean().fillna(method='ffill', limit=30)

            #Merge the applainces in mains data
            df_mains = df_mains.join(df_app, how='inner')

            # Identify impossible cases where appliance > total power
            mask_invalid = df_mains[app] > df_mains['P_mains']

            # Replace invalid samples with NaN first
            df_mains.loc[mask_invalid, app] = np.nan

            # Forward fill to replace with last valid power value
            df_mains[app] = df_mains[app].fillna(method='ffill', limit=5)
            
            print(f"  - Loaded {app} data")
        else:
            # Appliance not found, fill with zeros
            df_mains[app] = 0.0
            print(f"  - {app} not found, filled with zeros")
        
    # Store processed data
    all_house_data.append(df_mains)
    print(f"Building {house_id} processed successfully")
            
    #Set the noise signal in total power to 0
    df_mains[df_mains < noise_threshold] = 0  


if all_house_data:
    entire_data = pd.concat(all_house_data, ignore_index=True)
    len(entire_data)



Loading power data for Building 1
  Available appliances in Building 1:
    - active subwoofer
    - HTPC
    - vacuum cleaner
    - fridge freezer
    - boiler
    - light
    - ethernet switch
    - radio
    - hair straighteners
    - light
    - clothes iron
    - security alarm
    - light
    - light
    - laptop computer
    - light
    - audio system
    - water pump
    - soldering iron
    - mobile phone charger
    - charger
    - external hard disk
    - kitchen aid
    - audio amplifier
    - computer monitor
    - toaster
    - radio
    - immersion heater
    - laptop computer
    - light
    - microwave
    - toasted sandwich maker
    - breadmaker
    - washer dryer
    - light
    - light
    - light
    - light
    - solar thermal pumping station
    - light
    - fan
    - food processor
    - kettle
    - dish washer
    - USB hub
    - baby monitor
    - drill
    - mobile phone charger
    - wireless phone charger
    - television
    - tablet computer charger
  

Configure Variable