# imports

In [1]:
%reload_ext autoreload
%autoreload 2

In [None]:
!pip uninstall -y -q nilmtk nilm_metadata
!pip install tqdm
!python3 -m pip install --no-deps git+https://github.com/nilmtk/nilmtk@master
!python3 -m pip install --no-deps git+https://github.com/nilmtk/nilm_metadata@master

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from nilmtk import DataSet
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from collections import defaultdict
from joblib import Memory
import pickle
from tqdm import tqdm
import multiprocessing as mp
from typing import Dict
memory = Memory(location='./cache')


# functions

In [None]:
@memory.cache

def load_dataset():
try:
# Load REFIT dataset from .hdf5 file
dataset = DataSet('shared/nilm-start/refit.hdf5')

    samples = []
    
    # Calculate the total number of meters in the dataset for the progress bar
    counter = 0
    for meter in range(len(dataset.buildings.items())):
        counter += (len(dataset.buildings[meter + 1].elec.all_meters()))
        
    # Use a progress bar to display the loading process
    with tqdm(total=counter, desc='loading_meters', smoothing=0) as pbar:  
        # Go thru every meter of every building and save the data
        for building_idx, building in dataset.buildings.items():
            for meter in building.elec.all_meters():

                data = list(meter.load())
                

                assert len(data) == 1

                # Store the sample information including the building index,
                # the appliance type(s), the meter data, and the good sections of the meter data
                sample = (building_idx, list([a.type['type'] for a in meter.appliances]), data, meter.good_sections())


                samples.append(sample)
                pbar.update(1)

        return samples

except Exception as e:
    # Close the dataset store in case of an exception
    dataset.store.close()
    # Raise the exception
    raise e

In [22]:
def data_preparation(dataset):
    X = defaultdict(lambda: [])

    for (idx, appliances, data, good_sections) in tqdm(dataset):
        #print(idx, appliances)
        if not appliances:
            continue
            
        appliance = appliances[0]
        data = data[0]
    
        samples = [data[good.start:good.end] for good in good_sections]
        X[appliance].extend(samples)
        
    return X

prepared_data  = data_preparation(dataset)

100%|██████████| 111/111 [00:18<00:00,  5.97it/s]


In [23]:
processed_data = {}

for appliance, samples in tqdm(prepared_data.items(),smoothing=0):
    processed_samples = []
    for sample in samples:
        sample = sample.resample('7s').fillna("backfill")
        #sample = sample.resample('7s').ffill(limit=1).fillna(0)
        # It should contain at least one sample
        if len(sample) < 2:
            continue
        
        # TODO: Filter < 20W (Poglej Blažev članek
        if not np.any(sample.to_numpy() > 20):
            continue
            
        processed_samples.append(sample)
        
    processed_data[appliance] = list(processed_samples)
    
    #processed_data[appliance] = list([s.resample('7s').ffill(limit=1).fillna(0) for s in samples])
    
processed_data.pop('unknown', None);


100%|██████████| 54/54 [00:52<00:00,  1.03it/s]


In [None]:
pickle.dump(processed_data, open('processed_dataset','wb'))
