In [2]:
import os
import traceback
import joblib
import pickle
import pandas as pd
from itertools import product

from sklearn.neural_network import MLPClassifier
from nilmlab.factories import TransformerFactory

from datasources.datasource import DatasourceFactory
from experiments import GenericExperiment
from nilmlab.factories import EnvironmentFactory
from nilmlab.lab import TimeSeriesLength
from utils.logger import debug

In [3]:
LOG_DIR = "/l/users/roberto.guillen/nilm/logs/"
PRETRAINED_DIR  = "/l/users/roberto.guillen/nilm/pretrained_models/"
dirname = os.path.abspath('')

    # Results folder
dirname_res = os.path.join(dirname, "../results/")
if not os.path.exists(dirname_res):
    os.mkdir(dirname_res)

    # Pretrained KNN weights folder
dirname_pre = os.path.join(dirname, "../pretrained_models/")
if not os.path.exists(dirname_pre):
    os.mkdir(dirname_pre)

    # Log file folder
if not os.path.exists(LOG_DIR):
    os.mkdir(LOG_DIR)

    # Pretrained models folder 2
if not os.path.exists(PRETRAINED_DIR):
    os.mkdir(PRETRAINED_DIR)

In [4]:
def get_time_series_length(ts_id: str = "Hour"):
    if ts_id == "10Min":
        ts = TimeSeriesLength.WINDOW_10_MINS
    elif ts_id == "Hour":
        ts = TimeSeriesLength.WINDOW_1_HOUR
    elif ts_id == "Day":
        ts = TimeSeriesLength.WINDOW_1_DAY
    return ts

In [5]:
def get_classifier(id: int = 0):
    if id == 0:
        classifier = MLPClassifier(hidden_layer_sizes=(1000,), learning_rate='adaptive', solver='adam',early_stopping=True,validation_fraction=0.2)
    elif id == 1:
        classifier = MLPClassifier(hidden_layer_sizes=(2000, 100, 100), learning_rate='adaptive', solver='adam',early_stopping=True,validation_fraction=0.2)
    elif id == 2:
        classifier = MLPClassifier(hidden_layer_sizes=(1000, 100), learning_rate='adaptive', solver='adam',early_stopping=True,validation_fraction=0.2)
    elif id == 3:
        classifier = MLPClassifier(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', solver='adam',early_stopping=True,validation_fraction=0.2)
    return classifier

In [6]:
def get_datasource(datasource_ix = 0): # 0,false,ukdale    / 1,true,redd
    if datasource_ix:
        datasource_name = 'redd'
        datasource = DatasourceFactory.create_redd_datasource()
        appliances = [
            'unknown', 'electric oven','sockets', 'electric space heater', 'microwave', 
            'washer dryer', 'light', 'electric stove', 'dish washer', 'fridge'
        ]
        
        env = EnvironmentFactory.create_env_single_building(
            datasource=datasource,
            building=1,
            sample_period=6,
            train_year="2011-2011",
            train_start_date="4-1-2011",
            train_end_date="4-30-2011",
            test_year="2011",
            test_start_date="5-1-2011",
            test_end_date="5-2-2011",
            appliances=appliances
        )
    else:
        datasource_name = 'ukdale' # Review name change
        datasource = DatasourceFactory.create_uk_dale_datasource()
        appliances = [
            'microwave', 'dish washer', 'fridge', 'kettle', 'washer dryer',
            'toaster', 'television'
        ]
        env = EnvironmentFactory.create_env_single_building(
            datasource=DatasourceFactory.create_uk_dale_datasource(),
            building=1,
            sample_period=6,
            train_year="2013-2013",
            train_start_date="6-2-2013",
            train_end_date="6-3-2013",
            # train_year="2013-2014",
            # train_start_date="4-12-2013",
            # train_end_date="6-01-2014",
            test_year="2014",
            test_start_date="6-2-2014",
            test_end_date="6-3-2014",
            # test_start_date="6-2-2014",
            # test_end_date="12-30-2014",
            appliances=appliances
        )
    experiment = GenericExperiment(env)
    return datasource_name, appliances, experiment

In [7]:
datasource_ix = 0
datasource_name = "ukdale"
i = 99999999
components = 8
ts_length = "Day"
num_rep_vec = 1
classifier_type = 0 

# Review if log file exists
exp_name = "mys2v_components_%d_tsLength_%s_numRepVec_%d_classifier_type_%d"%(components, ts_length, num_rep_vec,classifier_type) 
log_file_name = datasource_name + "/" + exp_name + ".log" 
log_file_path = os.path.join(LOG_DIR, log_file_name)
if os.path.exists(log_file_path):
    # Skip if exists
    pass
# Create file
pickle.dump({},open(log_file_path,"wb"))
print("\n Working on setting", i, " PATH: " ,log_file_path)

# Names for files 
PRETRAINED_DIR = PRETRAINED_DIR + datasource_name + "/" 
dirname_res = dirname_res + datasource_name + "/" 
mys2v_knn_weights = os.path.join(PRETRAINED_DIR, f'{exp_name}_weight.pkl')
mys2v_embedding = os.path.join(PRETRAINED_DIR, f'{exp_name}_emb.pkl')
results_file_name = os.path.join(dirname_res, f'{exp_name}.csv')
print(mys2v_knn_weights,mys2v_embedding,results_file_name)

# Other params
window_size = 10
window_step = 1
epochs  = 2

models =  infer_mysignal2vec_experiment = {
    'MYSIGNAL2VEC_Build' : {
        'CLF_MODELS' : [ 
            get_classifier(classifier_type),
        ],
        'TRANSFORMER_MODELS': [
            TransformerFactory.build_mysignal2vec_train(num_rep_vec, window_size, window_step, components, components, epochs, exp_name),
        ]
    },
    'MYSIGNAL2VEC_Infer' : {
        'CLF_MODELS' : [ 
            get_classifier(classifier_type),
        ],
        'TRANSFORMER_MODELS': [
            TransformerFactory.build_mysignal2vec_infer(mys2v_knn_weights, mys2v_embedding, num_rep_vec),
        ]
    }
}

datasource_name, appliances, experiment = get_datasource(datasource_ix)
for k in models.keys():      
    experiment.setup_running_params(
        transformer_models=models[k]['TRANSFORMER_MODELS'],
        classifier_models=models[k]['CLF_MODELS'],
        train_appliances=appliances,
        test_appliances=appliances,
        ts_len=get_time_series_length(ts_length),
        repeat=1
    )

    experiment.set_checkpoint_file(results_file_name)
    tb = "No error"
    
    try:
        experiment.run()
    except Exception as e:
        tb = traceback.format_exc()
        debug(tb)
        debug(f"Failed for {k}")
        debug(f"{e}")
    
# TODO change how saving is handled
df = pd.read_csv(results_file_name)
joblib.dump(df, log_file_path)


 Working on setting 99999999  PATH:  /l/users/roberto.guillen/nilm/logs/ukdale/mys2v_components_8_tsLength_Day_numRepVec_1_classifier_type_0.log
/l/users/roberto.guillen/nilm/pretrained_models/ukdale/mys2v_components_8_tsLength_Day_numRepVec_1_classifier_type_0_weight.pkl /l/users/roberto.guillen/nilm/pretrained_models/ukdale/mys2v_components_8_tsLength_Day_numRepVec_1_classifier_type_0_emb.pkl /home/roberto.guillen/Documents/multi-nilm/experiments/../results/ukdale/mys2v_components_8_tsLength_Day_numRepVec_1_classifier_type_0.csv
INFO: Reading data from specified meters. 
-Building: 1
-Appliances ['microwave', 'dish washer', 'fridge', 'kettle', 'washer dryer', 'toaster', 'television']
DEBUG:  read_selected_appliances ['microwave', 'dish washer', 'fridge', 'kettle', 'washer dryer', 'toaster', 'television'], 1, 6-2-2013, 6-3-2013, True
TIMING: NILMTK select using appliances: 0.12


2022-07-01 01:44:38.115 | DEBUG    | nilmlab.lab:setup_one_building:576 - Length of data of all loaded meters 14400
2022-07-01 01:44:38.118 | DEBUG    | nilmlab.lab:setup_one_building:578 - Length of data of all loaded meters 14400
2022-07-01 01:44:38.119 | INFO     | datasources.labels_factory:create_multilabels_from_meters:28 - Creating multilabels from meter washer dryer, 
labels2id[col] (5, 1, 'UK-DALE')
metergroup[labels2id[col]] ElecMeter(instance=5, building=1, dataset='UK-DALE', appliances=[Appliance(type='washer dryer', instance=1), Appliance(type='washer dryer', instance=2)])
2022-07-01 01:44:38.119 | DEBUG    | datasources.labels_factory:create_multilabels_from_meters:37 - meters[col].values.astype(float) washer dryer - [0. 0. 0. ... 0. 0. 0.]


TIMING: NILMTK converting specified appliances to dataframe: 0.31
DEBUG: Length of data of read_selected_appliances 14400
INFO: Df columns before normalization Index([ (5, 1, 'UK-DALE'), (11, 1, 'UK-DALE'),  (7, 1, 'UK-DALE'),
       (54, 1, 'UK-DALE'), (13, 1, 'UK-DALE'), (10, 1, 'UK-DALE'),
        (6, 1, 'UK-DALE'), (12, 1, 'UK-DALE')],
      dtype='object')
INFO: Labels before normalization ['Washer dryer', 'Toaster', 'Television', 'Site meter', 'Microwave', 'Kettle', 'Dish washer', 'Fridge freezer']
INFO: washer dryer ~ Washer dryer (100%)
INFO: toaster ~ Toaster (100%)
INFO: television ~ Television (100%)
INFO: microwave ~ Microwave (100%)
INFO: kettle ~ Kettle (100%)
INFO: dish washer ~ Dish washer (100%)
INFO: fridge ~ Fridge freezer (100%)
INFO: Normalized labels ['washer dryer', 'toaster', 'television', 'Site meter', 'microwave', 'kettle', 'dish washer', 'fridge']
INFO: Meters that have been loaded (all_df.columns):
Index(['washer dryer', 'toaster', 'television', 'Site meter'

The keyword argument 'parallel=True' was specified but no transformation for parallel execution was possible.

To find out why, try turning on parallel diagnostics, see https://numba.readthedocs.io/en/stable/user/parallel.html#diagnostics for help.

File "../datasources/labels_factory.py", line 95:
# 1 if appliance is on at that time and 0 if appliance is off at that time (usually 6 secs)
def create_labels(array, threshold):
^

  state.func_ir.loc))
2022-07-01 01:44:38.600 | INFO     | datasources.labels_factory:create_multilabels_from_meters:28 - Creating multilabels from meter toaster, 
labels2id[col] (11, 1, 'UK-DALE')
metergroup[labels2id[col]] ElecMeter(instance=11, building=1, dataset='UK-DALE', appliances=[Appliance(type='toaster', instance=1), Appliance(type='kitchen aid', instance=1), Appliance(type='food processor', instance=2)])
2022-07-01 01:44:38.602 | DEBUG    | datasources.labels_factory:create_multilabels_from_meters:37 - meters[col].values.astype(float) toaster - [0. 0

TIMING: Create multilabels from meters 0.5
INFO: Reading data from specified meters. 
-Building: 1
-Appliances ['microwave', 'dish washer', 'fridge', 'kettle', 'washer dryer', 'toaster', 'television']
DEBUG:  read_selected_appliances ['microwave', 'dish washer', 'fridge', 'kettle', 'washer dryer', 'toaster', 'television'], 1, 6-2-2014, 6-3-2014, True
TIMING: NILMTK select using appliances: 0.14


2022-07-01 01:44:39.012 | DEBUG    | nilmlab.lab:setup_one_building:576 - Length of data of all loaded meters 14400
2022-07-01 01:44:39.014 | DEBUG    | nilmlab.lab:setup_one_building:578 - Length of data of all loaded meters 14400
2022-07-01 01:44:39.015 | INFO     | datasources.labels_factory:create_multilabels_from_meters:28 - Creating multilabels from meter washer dryer, 
labels2id[col] (5, 1, 'UK-DALE')
metergroup[labels2id[col]] ElecMeter(instance=5, building=1, dataset='UK-DALE', appliances=[Appliance(type='washer dryer', instance=1), Appliance(type='washer dryer', instance=2)])
2022-07-01 01:44:39.016 | DEBUG    | datasources.labels_factory:create_multilabels_from_meters:37 - meters[col].values.astype(float) washer dryer - [0. 0. 0. ... 0. 0. 0.]
2022-07-01 01:44:39.017 | INFO     | datasources.labels_factory:create_multilabels_from_meters:28 - Creating multilabels from meter toaster, 
labels2id[col] (11, 1, 'UK-DALE')
metergroup[labels2id[col]] ElecMeter(instance=11, building=

TIMING: NILMTK converting specified appliances to dataframe: 0.24
DEBUG: Length of data of read_selected_appliances 14400
INFO: Df columns before normalization Index([ (5, 1, 'UK-DALE'), (11, 1, 'UK-DALE'),  (7, 1, 'UK-DALE'),
       (54, 1, 'UK-DALE'), (13, 1, 'UK-DALE'), (10, 1, 'UK-DALE'),
        (6, 1, 'UK-DALE'), (12, 1, 'UK-DALE')],
      dtype='object')
INFO: Labels before normalization ['Washer dryer', 'Toaster', 'Television', 'Site meter', 'Microwave', 'Kettle', 'Dish washer', 'Fridge freezer']
INFO: washer dryer ~ Washer dryer (100%)
INFO: toaster ~ Toaster (100%)
INFO: television ~ Television (100%)
INFO: microwave ~ Microwave (100%)
INFO: kettle ~ Kettle (100%)
INFO: dish washer ~ Dish washer (100%)
INFO: fridge ~ Fridge freezer (100%)
INFO: Normalized labels ['washer dryer', 'toaster', 'television', 'Site meter', 'microwave', 'kettle', 'dish washer', 'fridge']
INFO: Meters that have been loaded (all_df.columns):
Index(['washer dryer', 'toaster', 'television', 'Site meter'

FileNotFoundError: [Errno 2] File b'/home/roberto.guillen/Documents/multi-nilm/experiments/../results/ukdale/mys2v_components_8_tsLength_Day_numRepVec_1_classifier_type_0.csv' does not exist: b'/home/roberto.guillen/Documents/multi-nilm/experiments/../results/ukdale/mys2v_components_8_tsLength_Day_numRepVec_1_classifier_type_0.csv'

In [None]:
datasource_ix = 0
datasource_name = "ukdale"

for i, (components, ts_length, num_rep_vec,classifier_type) in enumerate(product([2**p for p in range(2, 9)], ["10Min","Hour","Day"], [1,2,4,8],[0,1,2,3])):
    # Review if log file exists
    exp_name = "mys2v_components_%d_tsLength_%s_numRepVec_%d_classifier_type_%d"%(components, ts_length, num_rep_vec,classifier_type) 
    log_file_name = datasource_name + "/" + exp_name + ".log" 
    log_file_path = os.path.join(LOG_DIR, log_file_name)
    if os.path.exists(log_file_path):
        # Skip if exists
        continue
    # Create file
    pickle.dump({},open(log_file_path,"wb"))
    print("\n Working on setting", i, " PATH: " ,log_file_path)
    
    # Names for files 
    mys2v_knn_weights = os.path.join(PRETRAINED_DIR, f'{exp_name}_weight.pkl')
    mys2v_embedding = os.path.join(PRETRAINED_DIR, f'{exp_name}_emb.pkl')
    results_file_name = os.path.join(dirname_res, f'{exp_name}.csv')
    print(mys2v_knn_weights,mys2v_embedding,results_file_name)
    break
    # Other params
    window_size = 10
    window_step = 1
    epochs  = 2


    models =  infer_mysignal2vec_experiment = {
        'MYSIGNAL2VEC_Build' : {
            'CLF_MODELS' : [ 
                get_classifier(classifier_type),
            ],
            'TRANSFORMER_MODELS': [
                TransformerFactory.build_mysignal2vec_train(num_rep_vec, window_size, window_step, components, components, epochs, exp_name),
            ]
        },
        'MYSIGNAL2VEC_Infer' : {
            'CLF_MODELS' : [ 
                get_classifier(classifier_type),
            ],
            'TRANSFORMER_MODELS': [
            TransformerFactory.build_mysignal2vec_infer(mys2v_knn_weights, mys2v_embedding, num_rep_vec),
            ]
        }
    }

    datasource_name, appliances, experiment = get_datasource(datasource_ix)
    for k in models.keys():      
        experiment.setup_running_params(
            transformer_models=models[k]['TRANSFORMER_MODELS'],
            classifier_models=models[k]['CLF_MODELS'],
            train_appliances=appliances,
            test_appliances=appliances,
            ts_len=get_time_series_length(ts_length),
            repeat=1
        )

        experiment.set_checkpoint_file(results_file_name)
        tb = "No error"
        
        try:
            experiment.run()
        except Exception as e:
            tb = traceback.format_exc()
            debug(tb)
            debug(f"Failed for {k}")
            debug(f"{e}")
    # TODO change how saving is handled
    df = pd.read_csv(results_file_name)
    joblib.dump(df, log_file_path)

