### Sample code for Comparing NILM algorithms

In [1]:
from __future__ import print_function, division
import time
from matplotlib import rcParams
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

rcParams['figure.figsize'] = (13, 6)

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation, FHMM, MLE

  from ._solve_toeplitz import levinson
  from ._decomp_update import *
  from . import _csparsetools
  from ._shortest_path import shortest_path, floyd_warshall, dijkstra,\
  from ._tools import csgraph_to_dense, csgraph_from_dense,\
  from ._traversal import breadth_first_order, depth_first_order, \
  from ._min_spanning_tree import minimum_spanning_tree
  from ._reordering import reverse_cuthill_mckee, maximum_bipartite_matching, \
  from ._ufuncs import *
  from ._ellip_harm_2 import _ellipsoid, _ellipsoid_norm
  from ._group_columns import group_dense, group_sparse
  from .ckdtree import *
  from .qhull import *
  from . import _voronoi
  from . import _hausdorff
  from .murmurhash import murmurhash3_32
  from . import _bspl
  from . import _stats
  from ._logistic_sigmoid import _log_logistic_sigmoid
  from .sparsefuncs_fast import csr_row_norms
  from .expected_mutual_info_fast import expected_mutual_information
  from .pairwise_fast import _chi2_kernel_fast, _sparse_manhattan
 

### Dividing data into train and test set

In [2]:
train = DataSet('/home/shifona/Downloads/mini_project/REDD/redd.h5')
test = DataSet('/home/shifona/Downloads/mini_project/REDD/redd.h5')

  from .utilsextension import (
  from . import hdf5extension
  from . import linkextension
  from . import tableextension
  from . import indexesextension


HDF5ExtError: HDF5 error back trace

  File "H5F.c", line 586, in H5Fopen
    unable to open file
  File "H5Fint.c", line 1305, in H5F_open
    unable to lock the file
  File "H5FD.c", line 1839, in H5FD_lock
    driver lock request failed
  File "H5FDsec2.c", line 940, in H5FD_sec2_lock
    unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'

End of HDF5 error back trace

Unable to open/create file '/home/shifona/Downloads/mini_project/REDD/redd.h5'

Let us use building 1 for demo purposes

In [3]:
building = 1

Let's split data at April 30th

In [4]:
train.set_window(end="30-4-2011")
test.set_window(start="30-4-2011")


train_elec = train.buildings[1].elec
test_elec = test.buildings[1].elec

NameError: name 'train' is not defined

### Selecting top-5 appliances

In [5]:
top_5_train_elec = train_elec.submeters().select_top_k(k=5)

NameError: name 'train_elec' is not defined

### Training and disaggregation

In [6]:
def predict(clf, test_elec, sample_period, timezone):
    pred = {}
    gt= {}

    for i, chunk in enumerate(test_elec.mains().load(sample_period=sample_period)):
        chunk_drop_na = chunk.dropna()
        pred[i] = clf.disaggregate_chunk(chunk_drop_na)
        gt[i]={}

        for meter in test_elec.submeters().meters:
            # Only use the meters that we trained on (this saves time!)    
            gt[i][meter] = meter.load(sample_period=sample_period).next()
        gt[i] = pd.DataFrame({k:v.squeeze() for k,v in gt[i].iteritems()}, index=gt[i].values()[0].index).dropna()
        
    # If everything can fit in memory
    gt_overall = pd.concat(gt)
    gt_overall.index = gt_overall.index.droplevel()
    pred_overall = pd.concat(pred)
    pred_overall.index = pred_overall.index.droplevel()

    # Having the same order of columns
    gt_overall = gt_overall[pred_overall.columns]
    
    #Intersection of index
    gt_index_utc = gt_overall.index.tz_convert("UTC")
    pred_index_utc = pred_overall.index.tz_convert("UTC")
    common_index_utc = gt_index_utc.intersection(pred_index_utc)
    
    
    common_index_local = common_index_utc.tz_convert(timezone)
    gt_overall = gt_overall.ix[common_index_local]
    pred_overall = pred_overall.ix[common_index_local]
    appliance_labels = [m.label() for m in gt_overall.columns.values]
    gt_overall.columns = appliance_labels
    pred_overall.columns = appliance_labels
    return gt_overall, pred_overall

In [7]:
classifiers = {'CO':CombinatorialOptimisation(), 'FHMM':FHMM()}
predictions = {}
sample_period = 120
for clf_name, clf in classifiers.iteritems():
    print("*"*20)
    print(clf_name)
    print("*" *20)
    clf.train(top_5_train_elec, sample_period=sample_period)
    gt, predictions[clf_name] = predict(clf, test_elec, 120, train.metadata['timezone'])
   
    

********************
FHMM
********************


NameError: name 'top_5_train_elec' is not defined

In [8]:
def compute_rmse(gt, pred):
    from sklearn.metrics import mean_squared_error
    rms_error = {}
    for appliance in gt.columns:
        rms_error[appliance] = np.sqrt(mean_squared_error(gt[appliance], pred[appliance]))
    return pd.Series(rms_error)
def compute_acc(gt, pred):
    import math
    pd_ = {}
    gt_ = {}
    rerror = {}
    lt = 0
    leng = {}
    napp = []
    for appliance in gt.columns:
        t = len(pred[appliance])
        if lt<t:
            lt = t
            napp = []
        if lt==t:
            napp.append(appliance)
        leng[appliance]= t
        #print(str(appliance) + " : "+str(t))


    for appliance in napp:
        if (leng[appliance] < lt):
            continue
        #print(appliance)
        gt_[appliance] = gt[appliance]
        pd_[appliance] = pred[appliance]
        #print(str(len(pd_[appliance]))+", "+str(len(gt_[appliance])))
    
    #print(napp)
    #print(lt)
    s = 0
    sd = 0
    for i in range(lt):
        for appliance in napp:
            #if isinstance( pd[appliance][0], ( int, long ) ):
            #print( pd_[appliance] )
            #print(i)
            s = s + abs(pd_[appliance][i]-gt_[appliance][i])
            sd = sd + gt_[appliance][i]

    acc_ = 1 - (1.0*s)/sd/2
    #print(acc)
    #print(pd)
    return acc_
    #return pd.Series(rms_error)

In [9]:
rmse = {}
for clf_name in classifiers.keys():
    rmse[clf_name] = compute_rmse(gt, predictions[clf_name])
rmse = pd.DataFrame(rmse)

NameError: name 'gt' is not defined

In [None]:
rmse

In [None]:
accu = {}
for clf_name in classifiers.keys():
    accu[clf_name] = compute_acc(gt, predictions[clf_name])
print(accu)

In [None]:
i=0
for clf_name in classifiers.keys():
    pd=predictions[clf_name]
    for app in gt.columns:
        plt.figure(i)
        i=i+1
        gt[app].head(1000).plot(label="("+clf_name+") Data "+str(app))
        pd[app].head(1000).plot(label="("+clf_name+") Pred "+str(app))
        plt.legend()