In [153]:
import numpy as np
import pandas as pd
from os.path import join
import os
from pylab import rcParams
import matplotlib.pyplot as plt

import nilmtk
from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation, FHMM
from nilmtk.utils import print_dict
from nilmtk.metrics import f1_score

import warnings
warnings.filterwarnings("ignore")

In [154]:
import sys

In [155]:
sys.path.append("../../code/common/")
sys.path.append("../../code/fridge/")

In [156]:
ds = DataSet("/Users/nipunbatra/Downloads/wikienergy-2.h5")
fridges = nilmtk.global_meter_group.select_using_appliances(type='fridge')

In [157]:
fridges_id_building_id = {i:fridges.meters[i].building() for i in range(len(fridges.meters))}

In [158]:
fridge_id_building_id_ser = pd.Series(fridges_id_building_id)

In [159]:
fridge_id_building_id_ser.head()

0      1
1      1
2     10
3    101
4    102
dtype: int64

In [160]:
from fridge_compressor_durations_optimised_jul_7 import *

In [161]:
fridge_ids_to_consider = compressor_powers.keys()

In [162]:
Wm_to_kwh = 1.66666667 * 1e-5

def wm_to_kwh_per_month(wm, mins):
    return wm * Wm_to_kwh / (mins * 1.0 / (1440 * 30))

In [163]:
building_ids_to_consider = fridge_id_building_id_ser[fridge_ids_to_consider]

In [164]:

building_ids_to_consider.head()

1       1
2      10
8     105
11     11
13    112
dtype: int64

In [165]:
import glob

In [166]:
list_of_files = glob.glob("output/*.h5")

In [167]:
list_of_files

['output/1.h5',
 'output/100.h5',
 'output/102.h5',
 'output/103.h5',
 'output/104.h5',
 'output/107.h5',
 'output/109.h5',
 'output/11.h5',
 'output/110.h5',
 'output/112.h5',
 'output/114.h5',
 'output/115.h5',
 'output/116.h5',
 'output/118.h5',
 'output/119.h5',
 'output/123.h5',
 'output/124.h5',
 'output/125.h5',
 'output/126.h5',
 'output/128.h5',
 'output/129.h5',
 'output/13.h5',
 'output/130.h5',
 'output/131.h5',
 'output/133.h5',
 'output/134.h5',
 'output/135.h5',
 'output/136.h5',
 'output/138.h5',
 'output/139.h5',
 'output/14.h5',
 'output/140.h5',
 'output/142.h5',
 'output/144.h5',
 'output/145.h5',
 'output/146.h5',
 'output/149.h5',
 'output/15.h5',
 'output/151.h5',
 'output/152.h5',
 'output/153.h5',
 'output/154.h5',
 'output/155.h5',
 'output/157.h5',
 'output/158.h5',
 'output/159.h5',
 'output/161.h5',
 'output/163.h5',
 'output/167.h5',
 'output/169.h5',
 'output/170.h5',
 'output/18.h5',
 'output/2.h5',
 'output/22.h5',
 'output/25.h5',
 'output/29.h5',
 'ou

In [168]:
def create_df(o_new):
    d_new = pd.DataFrame(o_new).T
    d_new.columns = ["total", "baseline", "defrost", "usage",
                     "usage_cycles", "non_usage_cycles",
                     "defrost_cycles","baseline_duty_percent", "total_mins"]


    #d = d[d.usage > 0]
    d_new["artifical_sum"] = d_new.baseline + d_new.defrost + d_new.usage

    d_new["baseline_percentage"] = d_new.baseline * 100 / d_new.total
    d_new["defrost_percentage"] = d_new.defrost * 100 / d_new.total
    d_new["usage_percentage"] = d_new.usage * 100 / d_new.total
    return d_new

In [None]:
out = {}
for f in list_of_files[:]:
    #print f.split("/")[1].split(".")[0]
    fridge_num = int(f.split("/")[1].split(".")[0])
    print(fridge_num)
    df = pd.HDFStore(f)['/disag']
    #f = fridges.meters[n].load().next()[('power', 'active')]
    #df_energy = wm_to_kwh_per_month(df.sum(), len(df))
    out[fridge_num] = {}
    #for algo in ["Hart"]:
    for algo in ["CO", "GT", "FHMM", "Hart"]:
        
        
        out[fridge_num][algo] = compute_fractions_new(df[[algo]][algo], fridge_num)
    #out[f.split("/")[1].split(".")[0]] = df.sum()

1
0.39 0.333333333333
0.338 0.288888888889
0.585 0.5
0.521287128713 0.445544554455
100
0.334285714286 0.285714285714
0.612857142857 0.52380952381
0.677368421053 0.578947368421
0.557142857143 0.47619047619
102
0.39 0.333333333333
0.551886792453 0.471698113208
0.26 0.222222222222
0.459642857143 0.392857142857
103
0.39 0.333333333333
0.78 0.666666666667
0.39

In [116]:
di = {"baseline_percentage":{}, "defrost_percentage":{},
      "usage_percentage":{}, "usage_cycles":{}, 
      "non_usage_cycles":{}, "defrost_cycles":{}}
for element in di.keys():
    for f_id in out.keys():
        try:
            di[element][f_id] = {}
            for algo in ["CO", "FHMM", "Hart", "GT"]:
                 di[element][f_id][algo] = create_df(out[f_id][algo])[element].values[0]
        except:
            pass

In [123]:
usage_cyles_df = pd.DataFrame(di["usage_cycles"]).T
non_usage_cycles_df = pd.DataFrame(di["non_usage_cycles"]).T
defrost_cyles_df = pd.DataFrame(di["defrost_cycles"]).T

prop_usage_cycles = usage_cyles_df/(usage_cyles_df+non_usage_cycles_df)
baseline_percentage_df = pd.DataFrame(di["baseline_percentage"]).T
usage_percentage_df = pd.DataFrame(di["usage_percentage"]).T
defrost_percentage_df = pd.DataFrame(di["defrost_percentage"]).T

In [124]:
prop_usage_cycles.head()

Unnamed: 0,CO,FHMM,GT,Hart
1,0.510182,0.309524,0.328358,0.416465
2,0.266886,0.43582,0.488258,0.430538
8,0.816965,0.484377,0.275476,0.333897
11,0.273034,0.359568,0.15712,0.231375
13,0.484096,0.489576,0.422535,0.21147


In [91]:
from sklearn import linear_model

In [92]:
from common_functions import latexify, format_axes

In [96]:
import math
def sign(n):
    if n>0.0:
        return "+"
    else:
        return "-"

In [101]:
np.random.seed(42)
latexify(columns=2, fig_height=2.6)
fig, ax = plt.subplots(ncols=3, sharey=True)
for i, algo in enumerate(["CO", "FHMM", "Hart"]):
    d = baseline_percentage_df.dropna()
    x = d[["GT"]]
    y = d[[algo]]
   

    # Train the model using the training sets
    
    model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
    model_ransac.fit(x, y)
    ax[i].scatter(x, y, color="gray", alpha=0.5)
    
    ax[i].set_xlabel("Submetered baseline percentage")
    
    ax[i].plot(x, model_ransac.predict(x), color='black')
    format_axes(ax[i])
    mean_average_error = np.mean(np.abs(y-model_ransac.predict(x)))
    title = algo + "\n" + "y = %0.2f x %s %0.2f \n Mean Average Error= %0.1f percent" %(model_ransac.estimator_.coef_[0], 
                                                   sign(model_ransac.estimator_.intercept_[0]),
                                                   math.fabs(model_ransac.estimator_.intercept_[0]),
                                                                   mean_average_error)    #title = title +"\n" + str(scipy.stats.pearsonr(x, y))
    ax[i].set_title(title)
ax[0].set_ylabel("Predicted baseline percentage")
plt.tight_layout()
plt.savefig("../../figures/fridge/disag_algos_baseline_percentage.pdf", bbox_inches="tight")
plt.savefig("../../figures/fridge/disag_algos_baseline_percentage.png", bbox_inches="tight")

In [103]:
np.random.seed(42)
latexify(columns=2, fig_height=2.6)
fig, ax = plt.subplots(ncols=3, sharey=True)
for i, algo in enumerate(["CO", "FHMM", "Hart"]):
    d = usage_percentage_df.dropna()
    x = d[["GT"]]
    y = d[[algo]]
   

    # Train the model using the training sets
    
    model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
    model_ransac.fit(x, y)
    ax[i].scatter(x, y, color="gray", alpha=0.5)
    
    ax[i].set_xlabel("Submetered usage percentage")
    
    ax[i].plot(x, model_ransac.predict(x), color='black')
    format_axes(ax[i])
    mean_average_error = np.mean(np.abs(y-model_ransac.predict(x)))
    title = algo + "\n" + "y = %0.2f x %s %0.2f \n Mean Average Error= %0.1f percent" %(model_ransac.estimator_.coef_[0], 
                                                   sign(model_ransac.estimator_.intercept_[0]),
                                                   math.fabs(model_ransac.estimator_.intercept_[0]),
                                                                   mean_average_error)    #title = title +"\n" + str(scipy.stats.pearsonr(x, y))
    ax[i].set_title(title)
ax[0].set_ylabel("Predicted usage percentage")
plt.tight_layout()
plt.savefig("../../figures/fridge/disag_algos_usage_percentage.pdf", bbox_inches="tight")
plt.savefig("../../figures/fridge/disag_algos_usage_percentage.png", bbox_inches="tight")

In [114]:
np.random.seed(42)
latexify(columns=2, fig_height=2.6)
fig, ax = plt.subplots(ncols=3, sharey=True)
d = defrost_percentage_df.dropna()
for i, algo in enumerate(["CO", "FHMM", "Hart"]):
    
    x = d[["GT"]]
    y = d[[algo]]
   

    # Train the model using the training sets
    
    #model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
    #model_ransac.fit(x, y)
    ax[i].scatter(x, y, color="gray", alpha=0.5)
    
    ax[i].set_xlabel("Submetered defrost percentage")
    
    #ax[i].plot(x, model_ransac.predict(x), color='black')
    format_axes(ax[i])
    #mean_average_error = np.mean(np.abs(y-model_ransac.predict(x)))
    title = algo
    ax[i].set_title(title)
ax[0].set_ylabel("Predicted defrost percentage")
plt.tight_layout()
plt.savefig("../../figures/fridge/disag_algos_defrost_percentage.pdf", bbox_inches="tight")
plt.savefig("../../figures/fridge/disag_algos_defrost_percentage.png", bbox_inches="tight")

In [143]:
import pandas as pd
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager
from scipy import stats

from sklearn import svm

from sklearn.covariance import EllipticEnvelope
sys.path.append("../common")
import matplotlib.pyplot as plt

from common_functions import latexify, format_axes
latexify(columns=2, fig_height=2.6)
fig, ax = plt.subplots(ncols=3)

for i, algo in enumerate(["CO", "FHMM","Hart"]):
    df = pd.DataFrame({"usage_percentage":usage_percentage_df[algo], "usage proportion":prop_usage_cycles[algo]}).dropna()

    #df["usage proportion"] = df["usage_cycles"]/(df["usage_cycles"] + df["non_usage_cycles"])

    X = df["usage proportion"].values
    Y = df["usage_percentage"].values

    XY = df[["usage proportion","usage_percentage" ]].values


    

    # Example settings
    n_samples = len(df)
    outliers_fraction = 0.2
    clusters_separation = [0]

    # define two outlier detection tools to be compared
    classifiers = {
        "robust covariance estimator": EllipticEnvelope(contamination=.1)}

    # Compare given classifiers under given settings
    xx, yy = np.meshgrid(np.linspace(-0.1, 1.1, 500), np.linspace(0, 100, 500))
    n_inliers = int((1. - outliers_fraction) * n_samples)
    n_outliers = int(outliers_fraction * n_samples)

    
    
    # Fit the problem with varying cluster separation
    np.random.seed(42)
    # Data generation


    # Fit the model with the One-Class SVM
    #plt.figure(figsize=(10, 5))

    clf = EllipticEnvelope(contamination=.1)
    # fit the data and tag outliers
    clf.fit(XY)
    y_pred = clf.decision_function(XY).ravel()
    threshold = stats.scoreatpercentile(y_pred,
                                        100 * outliers_fraction)
    y_pred = y_pred > threshold
    # plot the levels lines and the points
    Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    subplot = ax[i]
    subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),
                     cmap=plt.cm.Blues_r)
    a = subplot.contour(xx, yy, Z, levels=[threshold],
                        linewidths=2, colors='red')
    subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],
                     colors='orange')
    b = subplot.scatter(XY[:-n_outliers, 0], XY[:-n_outliers, 1], c='white')
    c = subplot.scatter(XY[-n_outliers:, 0], XY[-n_outliers:, 1], c='white')
    subplot.axis('tight')
    subplot.legend(
        [a.collections[0]],
        ['Learned decision function'], loc=4)
    #subplot.set_xlabel("%d. %s (errors: %d)" % (1, "rob", n_errors))
    subplot.set_xlim((X.min(), X.max()))
    subplot.set_ylim((Y.min(), Y.max()))
    subplot.axhspan(df["usage_percentage"].median(), df["usage_percentage"].median())
    subplot.axvspan(df["usage proportion"].median(), df["usage proportion"].median())
    subplot.set_title(algo)
    #plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)
    format_axes(subplot)
    subplot.set_xlabel("Proportion of usage cycles")

    ylims = plt.ylim()
    subplot.set_ylim((0,100))
    xlims = plt.xlim()
    subplot.set_xlim((-0.1,1.1))

ax[0].set_ylabel(r"Usage energy $\%$")
plt.tight_layout()
    #plt.show()
plt.savefig("../../figures/fridge/disag_usage_energy_ratio.png")
plt.savefig("../../figures/fridge/disag_usage_energy_ratio.pdf")



In [152]:
df[df.usage_percentage.isin(XY[-n_outliers:, 1])]

Unnamed: 0,usage proportion,usage_percentage
93,0.447542,23.233339
95,0.409639,22.750318
139,0.363281,61.365395
140,0.215559,10.753222
142,0.430453,34.540007
144,0.265738,14.446163
145,0.339286,16.004397
146,0.394984,27.656352


In [146]:
XY[-n_outliers:, 0]

array([ 0.4475424 ,  0.40963855,  0.36328125,  0.21555916,  0.43045294,
        0.26573787,  0.33928571,  0.39498433])

In [147]:
XY[-n_outliers:, 1]

array([ 23.23333864,  22.75031764,  61.36539519,  10.75322157,
        34.54000697,  14.44616261,  16.00439686,  27.65635171])

In [35]:
ons_indices = np.where(diff_arr == 1)[0]
ons_indices

array([], dtype=int64)

In [36]:
offs_indices[0] < ons_indices[0]

IndexError: index 0 is out of bounds for axis 0 with size 0

In [24]:
from common_functions import latexify, format_axes
latexify()
ax = pd.DataFrame(out).T.dropna().plot(kind="bar")
plt.xlabel("Home")
plt.ylabel("Monthly fridge energy (kWh)")
format_axes(ax)
plt.tight_layout()
plt.savefig("../../figures/fridge/disag_algos_energy.pdf", bbox_inches="tight")
plt.savefig("../../figures/fridge/disag_algos_energy.png", bbox_inches="tight")

In [136]:
from sklearn import linear_model
import scipy

In [27]:
d = pd.DataFrame(out).T.dropna()

In [142]:
latexify(columns=2, fig_height=2.6)
fig, ax = plt.subplots(ncols=3, sharey=True)
for i, algo in enumerate(["CO", "FHMM", "Hart"]):
    x = d[["GT"]]
    y = d[[algo]]
   

    # Train the model using the training sets
    
    model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
    model_ransac.fit(x, y)
    ax[i].scatter(x, y, color="gray", alpha=0.5)
    
    ax[i].set_xlabel("Actual energy (kWh)")
    
    ax[i].plot(x, model_ransac.predict(x), color='black')
    format_axes(ax[i])
    title = algo + "\n" + "y = %0.2f x + %0.2f" %(model_ransac.estimator_.coef_[0], model_ransac.estimator_.intercept_[0])
    #title = title +"\n" + str(scipy.stats.pearsonr(x, y))
    ax[i].set_title(title)
ax[0].set_ylabel("Predicted energy (kWh)")
plt.tight_layout()
plt.savefig("../../figures/fridge/disag_algos_energy_scatter.pdf", bbox_inches="tight")
plt.savefig("../../figures/fridge/disag_algos_energy_scatter.png", bbox_inches="tight")

In [31]:
out_energy = {}
for algo in ["CO", "FHMM", "Hart"]:
    out_energy[algo]=(d["GT"]-d[algo]).abs().div(d["GT"])
    

In [33]:
pd.DataFrame(out_energy).plot(kind="box")

<matplotlib.axes._subplots.AxesSubplot at 0x134bb26d0>

In [34]:
pd.DataFrame(out_energy).describe()

Unnamed: 0,CO,FHMM,Hart
count,52.0,52.0,52.0
mean,0.938419,0.399077,0.317829
std,1.052879,0.537793,0.271267
min,0.092536,0.003751,0.012765
25%,0.288208,0.10637,0.121365
50%,0.459776,0.219939,0.235434
75%,1.229137,0.423629,0.427221
max,6.083893,2.870116,1.214972


In [35]:
d.head()

Unnamed: 0,CO,FHMM,GT,Hart
1,105.62622,99.891341,25.810942,38.951076
11,101.907341,52.027545,72.99505,55.851776
13,68.663017,67.13781,60.774502,45.489593
139,67.847249,52.004537,53.040532,20.788166
14,158.378926,111.074274,53.151874,47.456254


In [51]:
from sklearn.metrics import f1_score
f_score = {}
limit=20
for f in list_of_files:
    try:
        df = pd.HDFStore(f)['/disag']
        gt = (df>20)[["GT"]]
        pred = (df>20)[["CO", "FHMM", "Hart"]]
        o={}
        for algo in ["CO", "FHMM", "Hart"]:
            o[algo] = f1_score(gt["GT"], pred[algo] )
        f_score[f.split("/")[1].split(".")[0]] = pd.Series(o)
    except:
        pass

In [49]:
gt["GT"].head()

localminute
2014-05-01 00:00:00-05:00    True
2014-05-01 00:01:00-05:00    True
2014-05-01 00:02:00-05:00    True
2014-05-01 00:03:00-05:00    True
2014-05-01 00:04:00-05:00    True
Freq: 60S, Name: GT, dtype: bool

In [50]:
pred[algo].head()

localminute
2014-05-01 00:00:00-05:00    False
2014-05-01 00:01:00-05:00    False
2014-05-01 00:02:00-05:00    False
2014-05-01 00:03:00-05:00    False
2014-05-01 00:04:00-05:00    False
Freq: 60S, Name: Hart, dtype: bool

In [55]:
pd.DataFrame(f_score).T.describe()

Unnamed: 0,CO,FHMM,Hart
count,52.0,52.0,52.0
mean,0.625507,0.606242,0.662307
std,0.110851,0.137912,0.157258
min,0.367521,0.337143,0.250382
25%,0.557075,0.508627,0.588966
50%,0.62191,0.603452,0.676076
75%,0.684253,0.683499,0.781527
max,0.979902,0.999992,0.893847


In [111]:
gt = (df>20)[["GT"]]
gt.head()

Unnamed: 0_level_0,GT
localminute,Unnamed: 1_level_1
2014-03-17 00:00:00-05:00,True
2014-03-17 00:01:00-05:00,True
2014-03-17 00:02:00-05:00,False
2014-03-17 00:03:00-05:00,False
2014-03-17 00:04:00-05:00,False


In [112]:
algo = "Hart"
x = (df>20)[[algo]]
x.head()

Unnamed: 0_level_0,Hart
localminute,Unnamed: 1_level_1
2014-03-17 00:00:00-05:00,False
2014-03-17 00:01:00-05:00,False
2014-03-17 00:02:00-05:00,False
2014-03-17 00:03:00-05:00,False
2014-03-17 00:04:00-05:00,False


In [118]:
from sklearn.metrics import f1_score

In [119]:
f1_score(gt["GT"], x["Hart"] )

0.76383016455259056

In [117]:
(x["Hart"]==gt["GT"]).head().sum()

3

In [15]:
for f_id, b_id in building_ids_to_consider.head(3).iteritems():
    out[f_id] = {}
    elec = ds.buildings[b_id].elec
    mains = elec.mains()
    elec.appliances
    fridge_instance = fridges.meters[f_id].appliances[0].identifier.instance
    # Dividing train, test
    train_fraction = 0.5
    train = DataSet("/Users/nipunbatra/Downloads/wikienergy-2.h5")
    test = DataSet("/Users/nipunbatra/Downloads/wikienergy-2.h5")
    split_point = elec.train_test_split(train_fraction=train_fraction).date()
    train.set_window(end=split_point)
    test.set_window(start=split_point)
    train_elec = train.buildings[b_id].elec
    test_elec = test.buildings[b_id].elec
    test_mains = test_elec.mains()
    
    # GT elec
    gt_fridge  = test_elec[('fridge', fridge_instance)]
    
    # Pred elec
    for clf_name in cls_dict.keys():
        disag_filename = '%s/%d.h5' %(clf_name, f_id)
        ds_pred = DataSet(disag_filename)
        out[f_id][clf_name] = {}
        pred_fridge = ds_pred.buildings[b_id].elec[('fridge', fridge_instance)]
        out[f_id][clf_name]["pred_energy"] = pred_fridge.total_energy()['active']
        out[f_id][clf_name]["gt_energy"] = gt_fridge.total_energy()['active']
    
    
    
   
    


In [16]:
out

{1: {'CO': {'gt_energy': 25.901466666666668,
   'pred_energy': 268.47235000000001},
  'FHMM': {'gt_energy': 25.901466666666668,
   'pred_energy': 197.42403333333334}},
 2: {'CO': {'gt_energy': 125.88225, 'pred_energy': 175.04763333333332},
  'FHMM': {'gt_energy': 125.88225, 'pred_energy': 154.08428333333333}},
 8: {'CO': {'gt_energy': 154.89259999999999, 'pred_energy': 374.2894},
  'FHMM': {'gt_energy': 154.89259999999999,
   'pred_energy': 261.03871666666669}}}

In [17]:
disag_filename
disag = DataSet(disag_filename)
disag_elec = disag.buildings[b_id].elec

In [18]:
disag_elec.plot()


<matplotlib.axes._subplots.AxesSubplot at 0x12ce1d250>

In [52]:
fridge_elec in top_k_train_elec.meters

False

In [54]:
fridge_elec not in top_k_train_elec.meters

True

In [8]:
building_number = 11
fridge_id = 2

In [9]:
elec = ds.buildings[building_number].elec
mains = elec.mains()
elec.appliances



[Appliance(type='fridge', instance=1),
 Appliance(type='dish washer', instance=1),
 Appliance(type='electric water heating appliance', instance=1),
 Appliance(type='spin dryer', instance=1),
 Appliance(type='electric furnace', instance=1),
 Appliance(type='sockets', instance=1),
 Appliance(type='sockets', instance=2),
 Appliance(type='air conditioner', instance=1),
 Appliance(type='sockets', instance=3),
 Appliance(type='sockets', instance=4)]

In [32]:
split_point = elec.train_test_split(train_fraction=0.2)

In [33]:
split_point.date()

datetime.date(2014, 2, 24)

In [34]:
train = DataSet("/Users/nipunbatra/Downloads/wikienergy-2.h5")

In [35]:
train.set_window(end=split_point.date())



In [36]:
train.buildings[11].elec.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x13531dad0>

In [12]:
test.set_window(start="30-4-2011")

Timestamp('2014-04-02 00:30:00-0500', tz='US/Central')

In [12]:
co = CombinatorialOptimisation()
co.train(elec)

Training model for submeter 'ElecMeter(instance=2, building=11, dataset='WikiEnergy', appliances=[Appliance(type='air conditioner', instance=1)])'
Training model for submeter 'ElecMeter(instance=3, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=1)])'
Training model for submeter 'ElecMeter(instance=4, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=2)])'
Training model for submeter 'ElecMeter(instance=5, building=11, dataset='WikiEnergy', appliances=[Appliance(type='dish washer', instance=1)])'
Training model for submeter 'ElecMeter(instance=6, building=11, dataset='WikiEnergy', appliances=[Appliance(type='spin dryer', instance=1)])'
Training model for submeter 'ElecMeter(instance=7, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric furnace', instance=1)])'
Training model for submeter 'ElecMeter(instance=8, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=3)])'
Tra

In [13]:
disag_filename = 'wikienergy-disag-co-new.h5'
output = HDFDataStore(disag_filename, 'w')
co.disaggregate(elec.mains(), output)
output.close()

vampire_power = 0.0 watts
Estimating power demand for 'ElecMeter(instance=2, building=11, dataset='WikiEnergy', appliances=[Appliance(type='air conditioner', instance=1)])'
Estimating power demand for 'ElecMeter(instance=3, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=1)])'
Estimating power demand for 'ElecMeter(instance=4, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=2)])'
Estimating power demand for 'ElecMeter(instance=5, building=11, dataset='WikiEnergy', appliances=[Appliance(type='dish washer', instance=1)])'
Estimating power demand for 'ElecMeter(instance=6, building=11, dataset='WikiEnergy', appliances=[Appliance(type='spin dryer', instance=1)])'
Estimating power demand for 'ElecMeter(instance=7, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric furnace', instance=1)])'
Estimating power demand for 'ElecMeter(instance=8, building=11, dataset='WikiEnergy', appliances=[Appliance(type='so

In [16]:
disag = DataSet(disag_filename)
disag_elec_co = disag.buildings[building_number].elec

In [17]:
pred_df_co = disag_elec['fridge'].load().next()[('power','active')]

In [18]:
fhmm = fhmm_exact.FHMM()
fhmm.train(elec)

Training model for submeter 'ElecMeter(instance=2, building=11, dataset='WikiEnergy', appliances=[Appliance(type='air conditioner', instance=1)])'
Training model for submeter 'ElecMeter(instance=3, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=1)])'
Training model for submeter 'ElecMeter(instance=4, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=2)])'
Training model for submeter 'ElecMeter(instance=5, building=11, dataset='WikiEnergy', appliances=[Appliance(type='dish washer', instance=1)])'
Training model for submeter 'ElecMeter(instance=6, building=11, dataset='WikiEnergy', appliances=[Appliance(type='spin dryer', instance=1)])'
Training model for submeter 'ElecMeter(instance=7, building=11, dataset='WikiEnergy', appliances=[Appliance(type='electric furnace', instance=1)])'
Training model for submeter 'ElecMeter(instance=8, building=11, dataset='WikiEnergy', appliances=[Appliance(type='sockets', instance=3)])'
Tra

In [19]:
disag_filename = 'wikienergy-disag-fhmm.h5'
output = HDFDataStore(disag_filename, 'w')
fhmm.disaggregate(elec.mains(), output)

KeyboardInterrupt: 

In [None]:
disag = DataSet(disag_filename)
disag_elec = disag.buildings[building_number].elec 

In [None]:
pred_df_fhmm = disag_elec['fridge'].load().next()[('power','active')]

In [20]:
from nilmtk.disaggregate.hart_85 import Hart85
h = Hart85()

In [21]:
h.train(elec.mains())

Finding Edges, please wait ...
Edge detection complete.
Creating transition frame ...
Transition frame created.
Creating states frame ...
States frame created.
Finished.


In [22]:
h.steady_states.head()

Unnamed: 0,active average
2014-02-01 00:21:00-06:00,305.0
2014-02-01 00:29:00-06:00,637.0
2014-02-01 00:33:00-06:00,456.939394
2014-02-01 01:07:00-06:00,308.125
2014-02-01 01:15:00-06:00,452.444444


In [23]:
h.centroids

Unnamed: 0,"(power, active)"
0,152.154196
1,3401.577683
2,908.271235
3,5057.765816
4,1587.208333
5,8382.5


In [24]:
disag_filename = 'wikienergy-disag-hart.h5'
output = HDFDataStore(disag_filename, 'w')
h.disaggregate(elec.mains(), output)
disag = DataSet(disag_filename)
disag_elec = disag.buildings[building_number].elec

Finding Edges, please wait ...
Edge detection complete.
Creating transition frame ...
Transition frame created.
Creating states frame ...
States frame created.
Finished.


In [27]:
ax1 = disag_elec['unknown', 0].load().next().plot()
pred_df_co.plot(ax=ax1, label="Combinatorial Optimisation")
elec['fridge', 1].load().next().plot(ax=ax1)
ax1.legend(["Predicted Hart", "Predicted CO","Ground truth"]);
plt.ylabel("Power (W)")
plt.xlabel("Time");