# Disaggregation

In [3]:
from __future__ import print_function, division
import time
from matplotlib import rcParams
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

rcParams['figure.figsize'] = (13, 6)

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation

### Dividing data into train and test set

In [4]:
train = DataSet('/home/shifona/Downloads/mini_project/REDD/redd.h5')
test = DataSet('/home/shifona/Downloads/mini_project/REDD/redd.h5')

HDF5ExtError: HDF5 error back trace

  File "H5F.c", line 586, in H5Fopen
    unable to open file
  File "H5Fint.c", line 1305, in H5F_open
    unable to lock the file
  File "H5FD.c", line 1839, in H5FD_lock
    driver lock request failed
  File "H5FDsec2.c", line 940, in H5FD_sec2_lock
    unable to lock file, errno = 11, error message = 'Resource temporarily unavailable'

End of HDF5 error back trace

Unable to open/create file '/home/shifona/Downloads/mini_project/REDD/redd.h5'

Let us use building 1 for demo purposes

In [None]:
building = 1

Let's split data at April 30th

In [None]:
#train.set_window(end="30-4-2011")
#test.set_window(start="30-4-2011")

print(train.buildings.keys())
train_elec = train.buildings[1].elec
test_elec = test.buildings[1].elec

In [None]:
train_elec.plot()

In [None]:
test_elec.mains().plot()

REDD data set has got appliance level data sampled every 3 or 4 seconds and mains data sampled every 1 second. Let us verify the same.

In [None]:
#fridge_meter = train_elec['fridge']

In [None]:
#fridge_df = fridge_meter.load().next()

In [None]:
#fridge_df.head()

In [None]:
mains = train_elec.mains()

In [None]:
mains_df = mains.load().next()

In [None]:
mains_df.head()

Since, both of these are sampled at different frequencies, we will downsample both to 1 minute resolution. We will also select the top-5 appliances in terms of energy consumption and use them for training our FHMM and CO models.

### Selecting top-5 appliances

In [None]:
top_5_train_elec = train_elec.submeters().select_top_k(k=5)

In [None]:
top_5_train_elec

### Training and disaggregation

#### FHMM

In [None]:
start = time.time()
from nilmtk.disaggregate import fhmm_exact
fhmm = fhmm_exact.FHMM()
# Note that we have given the sample period to downsample the data to 1 minute. 
# If instead of top_5 we wanted to train on all appliance, we would write 
# fhmm.train(train_elec, sample_period=60)
print(top_5_train_elec)
fhmm.train(top_5_train_elec, sample_period=60)
fhmm.train(top_5_train_elec, sample_period=60)
end = time.time()
print("Runtime =", end-start, "seconds.")

In [None]:
pred = {}
gt= {}

for i, chunk in enumerate(test_elec.mains().load(sample_period=60)):
    chunk_drop_na = chunk.dropna()
    pred[i] = fhmm.disaggregate_chunk(chunk_drop_na)
    gt[i]={}
    
    for meter in test_elec.submeters().meters:
        # Only use the meters that we trained on (this saves time!)    
        gt[i][meter] = meter.load(sample_period=60).next()
    gt[i] = pd.DataFrame({k:v.squeeze() for k,v in gt[i].iteritems()}, index=gt[i].values()[0].index).dropna()

In [None]:
# If everything can fit in memory
gt_overall = pd.concat(gt)
gt_overall.index = gt_overall.index.droplevel()
pred_overall = pd.concat(pred)
pred_overall.index = pred_overall.index.droplevel()
#appliance_labels = [m.label() for m in gt_overall.columns.values]
#print(pred_overall.columns[0].appliances[0].type['type'])
#print(pred_overall.columns[0].appliances[0].instance)

#print(appliance_labels)
#gt_overall.columns = appliance_labels
#pred_overall.columns = appliance_labels
#print(pred)
#print(pred_overall['fridge'])
#print("+")
# print(type(pred_overall.columns))
#print("+")
# print((gt_overall))
#print(pred_overall.columns.difference(gt_overall.columns))
# find allgt col
#find all pred col

# take iteration

# filter gt_overall using  above

# Having the same order of columns
gt_overall = gt_overall[pred_overall.columns]

In [None]:
#Intersection of index
gt_index_utc = gt_overall.index.tz_convert("UTC")
pred_index_utc = pred_overall.index.tz_convert("UTC")
common_index_utc = gt_index_utc.intersection(pred_index_utc)

In [None]:
local_timezone = train.metadata['timezone']

In [None]:
common_index_local = common_index_utc.tz_convert(local_timezone)

In [None]:
gt_overall = gt_overall.ix[common_index_local]
pred_overall = pred_overall.ix[common_index_local]

In [None]:
gt_overall.head()

Using prettier names!

In [None]:
appliance_labels = [m.label() for m in gt_overall.columns.values]
gt_overall.columns = appliance_labels
pred_overall.columns = appliance_labels

In [None]:
pred_overall.head()

In [None]:
#pred_overall['Fridge'].head(1000).plot(label="Pred")
#gt_overall['Fridge'].head(1000).plot(label="GT")
#plt.legend()

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
rms_error = {}
for appliance in gt_overall.columns:
    rms_error[appliance] = np.sqrt(mean_squared_error(gt_overall[appliance], pred_overall[appliance]))

In [None]:
pd.Series(rms_error)

In [None]:
import math
pd_ = {}
gt_ = {}
rerror = {}
lt = 0
leng = {}
napp = []
#print(gt_overall.columns)
for appliance in gt_overall.columns:
    t = len(pred_overall[appliance])
    if lt<t:
        lt = t
        napp = []
    if lt==t:
        napp.append(appliance)
    leng[appliance]= t
    print(str(appliance) + " : "+str(t))
    

for appliance in napp:
    if (leng[appliance] < lt):
        continue
    print(appliance)
    gt_[appliance] = gt_overall[appliance]
    pd_[appliance] = pred_overall[appliance]
    #print(gt_[appliance])
    #for a,b in zip(pred_overall[appliance],gt_overall[appliance]):
    #    pd[appliance].append(a)
    #    gt[appliance].append(b)
   # print gt_[appliance][1]
    print(str(len(pd_[appliance]))+", "+str(len(gt_[appliance])))
    #print(str(len(pred_overall[appliance]))+": "+str(len(gt_overall[appliance])))
    
print(napp)
print(lt)
s = 0
sd = 0
for i in range(lt):
    for appliance in napp:
        #if isinstance( pd[appliance][0], ( int, long ) ):
        print( pd[appliance] )
        #print(i)
        s = s + abs(pd_[appliance][i]-gt_[appliance][i])
        sd = sd + gt_[appliance][i]
        
acc = 1 - (1.0*s)/sd/2
print(acc)
#print(pd)


In [None]:
print(lt)
s = 0
sd = 0
for i in range(lt):
    for appliance in napp:
        #if isinstance( pd[appliance][0], ( int, long ) ):
        print( pd[appliance] )
        #print(i)
        s = s + abs(pd_[appliance][i]-gt_[appliance][i])
        sd = sd + gt_[appliance][i]
        
acc = 1 - (1.0*s)/sd/2
print(acc)

In [None]:
import math
pd = {}
gt = {}
rerror = {}
lt = 0
leng = {}
napp = []
for appliance in gt_overall.columns:
    t = len(gt_overall[appliance])
    if lt<t:
        lt = t
        napp = []
    if lt==t:
        napp.append(appliance)
    leng[appliance]= t
    

for appliance in napp:
    if (leng[appliance] < lt):
        continue
    pd[appliance] = []
    gt[appliance] = []
    for a,b in zip(pred_overall[appliance],gt_overall[appliance]):
        pd[appliance].append(a)
        gt[appliance].append(b)
    
    print(str(len(pd[appliance]))+", "+str(len(gt[appliance])))

#print(pd)
