In [1]:
import sys
mins = 60

from hmmlearn import hmm

import nilmtk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
import warnings
import sys
sys.path.append("../code/")
%matplotlib inline

In [2]:
warnings.filterwarnings("ignore")



from nilmtk import *
import os
import nilmtk

metadata_df = pd.read_csv("./metadata.csv",index_col=0)
feeds = {'use':'aggregate',
        'air1':'hvac',
         'clotheswasher1':'wm',
         'dishwasher1':'dw',
         'microwave1':'mw',
         'refrigerator1':'fridge',
         'oven1':'oven',
        }

feed_r = {v:k for k, v in feeds.items()}
city = 'Austin'
year = 2015


st = pd.HDFStore(os.path.expanduser("~/all.h5"), mode='r')

city_data = metadata_df[metadata_df['city'] == city]
city_homes = city_data.index.values.astype('int')

from common import APPLIANCES_ORDER

models = {}

In [8]:
for appliance in APPLIANCES_ORDER[1:]:
    print(appliance, "training")
    o = []
    for hn, home in enumerate(city_homes[:]):
        #print(home, hn)

        
        try:
            dft = st['/%d' %home]['%s' %year][feed_r[appliance]].iloc[3600:4080]
            appl_power = dft.dropna().values.reshape(-1,1)
            activation = (dft>10).sum()*1.0/len(dft)
            if appliance in ["wm","dw","oven","mw"]:
                if len(appl_power)>10:
                    o.append(appl_power)

            else:
                if activation>0.08:
                    o.append(appl_power)
        except :
            pass

    if len(o)>1:
        o = np.array(o)
        print("Length of o", o.shape)
        mod = hmm.GaussianHMM(3, "full")
        mod.fit(o)
        models[appliance] = mod
        print("Means for %s are" %appliance)
        print(mod.means_)
    else:
        print("Not enough samples for %s" %appliance)

hvac training
Length of o (385,)
Means for hvac are
[[  514.80215183]
 [   12.62050204]
 [ 1639.6020198 ]]
fridge training
Length of o (318,)
Means for fridge are
[[  53.08612528]
 [ 205.62985773]
 [ 105.05012415]]
mw training
Length of o (217,)
Means for mw are
[[   2.8869722 ]
 [  33.22728033]
 [ 120.46751662]]
dw training
Length of o (289,)
Means for dw are
[[  3.86828540e-102]
 [  4.67467286e+002]
 [  1.72742147e+001]]
wm training
Length of o (223,)
Means for wm are
[[   0.        ]
 [ 100.18015886]
 [   9.30022173]]
oven training
Length of o (125,)
Means for oven are
[[   8.85220768]
 [   2.19140812]
 [ 310.1776985 ]]


In [14]:
# In[8]:

from nilmtk.disaggregate.fhmm_exact import sort_learnt_parameters


# In[9]:

new_learnt_models = OrderedDict()
for appliance, appliance_model in models.items():
    startprob, means, covars, transmat = sort_learnt_parameters(
                    appliance_model.startprob_, appliance_model.means_,
                    appliance_model.covars_, appliance_model.transmat_)
    new_learnt_models[appliance] = hmm.GaussianHMM(
                startprob.size, "full", startprob, transmat)
    new_learnt_models[appliance].means_ = means
    new_learnt_models[appliance].covars_ = covars


# In[ ]:

from nilmtk.disaggregate.fhmm_exact import create_combined_hmm


# In[ ]:

learnt_model_combined = create_combined_hmm(new_learnt_models)


# In[ ]:

from nilmtk.disaggregate.fhmm_exact import FHMM


# In[16]:

f = FHMM()


# In[17]:

f.model = learnt_model_combined
f.individual = new_learnt_models


# In[18]:

import pickle


# In[19]:

pickle.dump(f, open( "../fhmm_model_all_%d.p" %mins, "wb" ))

In [17]:
tensor = np.load('../1H-input.npy')

In [18]:
t_all, valid_homes = create_subset_dataset(tensor)
t_all.shape

(52, 7, 336)

In [15]:
def create_subset_dataset(tensor):
    t_subset = tensor[:, :, 180:194, :]
    all_indices = np.array(list(range(320)))
    for i in range(1, 7):
        valid_homes = pd.DataFrame(t_subset[:, i, :].reshape(320, 14*24)).dropna().index
        all_indices = np.intersect1d(all_indices, valid_homes)
    t_subset = t_subset[all_indices, :, :, :].reshape(52, 7, 14*24)
    
    # Create artificial aggregate
    t_subset[:, 0, :] = 0.0
    for i in range(1, 7):
        t_subset[:, 0, :] = t_subset[:, 0, :] + t_subset[:, i, :]
    # t_subset is of shape (#home, appliance, days*hours)
    return t_subset, all_indices

In [19]:
train_agg = t_all[:30, 0, :].reshape(30*14, 24)
train_appliance = t_all[:30, 1:, :].reshape(30*14, 6*24)

train_hvac = t_all[:30, 1, :].reshape(30*14, 24)
train_fridge = t_all[:30, 2, :].reshape(30*14, 24)
train_mw = t_all[:30, 3, :].reshape(30*14, 24)
train_dw = t_all[:30, 4, :].reshape(30*14, 24)
train_wm = t_all[:30, 5, :].reshape(30*14, 24)
train_oven = t_all[:30, 6, :].reshape(30*14, 24)








train_agg_new = train_hvac + train_fridge





test_hvac = t_all[30:, 1, :].reshape(22*14, 24)
test_fridge = t_all[30:, 2, :].reshape(22*14, 24)
test_mw = t_all[30:, 3, :].reshape(22*14, 24)
test_dw = t_all[30:, 4, :].reshape(22*14, 24)
test_wm = t_all[30:, 5, :].reshape(22*14, 24)
test_oven = t_all[30:, 6, :].reshape(22*14, 24)
test_appliance = t_all[30:, 1:, :].reshape(22*14, 6*24)









test_agg = t_all[30:, 0, :].reshape(22*14, 24)
test_agg_new = test_hvac + test_fridge




In [20]:
from common import APPLIANCES_ORDER

In [44]:
o[10].shape

(480, 1)

In [40]:
train_hvac.shape

(420, 24)

In [52]:
for appliance in APPLIANCES_ORDER[1:]:
    print(appliance, "training")
    train_appliance = t_all[:30, APPLIANCES_ORDER.index(appliance), :].reshape(30*14, 24)
    
    mod = hmm.GaussianHMM(3, "full")
    mod.fit(train_appliance.reshape(-1, 420, 1))
    models[appliance] = mod
    print("Means for %s are" %appliance)
    print(mod.means_)



hvac training
Means for hvac are
[[  857.78000675]
 [  134.12083446]
 [ 1982.11966036]]
fridge training
Means for fridge are
[[  65.60073032]
 [ 139.81886167]
 [  82.2790635 ]]
mw training
Means for mw are
[[   2.27637744]
 [ 107.08210731]
 [  19.46393628]]
dw training
Means for dw are
[[  6.45316426e-06]
 [  5.20760204e+02]
 [  2.09764943e+01]]
wm training
Means for wm are
[[   0.        ]
 [ 106.51820273]
 [  10.2084481 ]]
oven training
Means for oven are
[[   1.73995153]
 [   7.87050492]
 [ 556.4387012 ]]
Not enough samples for oven


In [53]:
from nilmtk.disaggregate.fhmm_exact import sort_learnt_parameters


# In[9]:

new_learnt_models = OrderedDict()
for appliance, appliance_model in models.items():
    startprob, means, covars, transmat = sort_learnt_parameters(
                    appliance_model.startprob_, appliance_model.means_,
                    appliance_model.covars_, appliance_model.transmat_)
    new_learnt_models[appliance] = hmm.GaussianHMM(
                startprob.size, "full", startprob, transmat)
    new_learnt_models[appliance].means_ = means
    new_learnt_models[appliance].covars_ = covars


# In[ ]:

from nilmtk.disaggregate.fhmm_exact import create_combined_hmm


# In[ ]:

learnt_model_combined = create_combined_hmm(new_learnt_models)


# In[ ]:

from nilmtk.disaggregate.fhmm_exact import FHMM


# In[16]:

f = FHMM()


# In[17]:

f.model = learnt_model_combined
f.individual = new_learnt_models


# In[18]:

import pickle


# In[19]:

pickle.dump(f, open( "../fhmm_model_all_%d.p" %mins, "wb" ))

In [94]:
p['hvac'][p['hvac']>d] = d[[p['hvac']>d]]
p['hvac']

0      858.000000
1      858.000000
2      134.000000
3      134.000000
4      134.000000
5      132.799999
6       70.916664
7      134.000000
8      858.000000
9      134.000000
10     134.000000
11     858.000000
12     858.000000
13     858.000000
14     858.000000
15     858.000000
16    1982.000000
17    1982.000000
18    1982.000000
19    1982.000000
20     858.000000
21     858.000000
22    1982.000000
23     858.000000
Name: hvac, dtype: float64

In [95]:
pred_fhmm = []
pred = {}
for appliance in APPLIANCES_ORDER[1:]:
    pred[appliance] = []
for i, d in enumerate(test_agg[:]):
    print(i)
    p = f.disaggregate_chunk(pd.DataFrame(d))
    p['hvac'][p['hvac']>d] = d[[p['hvac']>d]]


    for appliance in p.columns:
        pred[appliance].append(p[appliance].values)
for appliance in APPLIANCES_ORDER[1:]:
    pred[appliance] = np.array(pred[appliance])

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [96]:
from sklearn.metrics import mean_absolute_error
for appliance in APPLIANCES_ORDER[1:]:
        print(appliance, mean_absolute_error(pred[appliance],  t_all[30:, APPLIANCES_ORDER.index(appliance), :].reshape(22*14, 24)))

hvac 433.323076353
fridge 42.0302172858
mw 40.9481669386
dw 169.639290198
wm 43.5151041613
oven 171.871076865
