# Group E Assignment 5: NILMTK

## Exercise 1

### Load UKDALE data into memory and print out the metadata

In [None]:
from nilmtk import DataSet
from nilmtk.utils import print_dict
from nilmtk.timeframe import TimeFrame
import pandas as pd

ukdale = DataSet('./data/ukdale.h5')


#train = DataSet('/path/redd.h5')

### 1.2 Print out Metadata

In [None]:
print_dict(ukdale.metadata)

### Print out Buildings

In [None]:
print_dict(ukdale.buildings)

### 1.3 Print out the sub-metered appliances in each building

In [None]:
for build in ukdale.buildings:
    print("Appliances of Building " +str(build))
    print(ukdale.buildings[build].elec.submeters())
    print("---")

In [None]:
elec = ukdale.buildings[1].elec

### 1.4 Calculate the total energy consumption for building 1 in kWh

In [None]:
elec.mains().total_energy()

### 1.5 Print out the type of power for mains and sub-meters

In [None]:
elec.mains().available_ac_types('power')

In [None]:
elec.submeters().available_ac_types('power')

## Exercise 2

### 2.1 Timeframed "Fridge Freezer" and "Light" Power Plot

In [None]:
ukdale_window = DataSet('./data/ukdale.h5')
ukdale_window.set_window(start='2014-04-28', end='2014-04-29')

fridge_meter = ukdale_window.buildings[1].elec['fridge freezer']
light_meter = ukdale_window.buildings[1].elec['light']
elec = ukdale_window.buildings[1].elec

In [None]:
fridge_meter.plot()
light_meter.plot()

### 2.2 Plot Overall Consumption For that time period

In [None]:
all_window = next(ukdale_window.buildings[1].elec.load())
all_window.head()

In [None]:
all_window['power'].plot()

### 2.3 Calculate and plot the energy consumption fraction for each sub-meter

In [None]:
energy_fraction_per_submeter = elec.submeters().energy_per_meter().transpose().fillna(0)
del energy_fraction_per_submeter['reactive']
active_en = energy_fraction_per_submeter['active']
active_en_frac = active_en/active_en.sum()
active_en_frac.plot(kind="bar", figsize=(15,10), title="Active Energy Fraction")

In [None]:
apparent_en = energy_fraction_per_submeter['apparent']
apparent_en_frac = apparent_en/apparent_en.sum()
apparent_en_frac.plot(kind="bar", figsize=(15,10), title="Apparent Energy Fraction")

### 2.4 Find appliances of the type “single-phase induction motor”

In [None]:
elec.select_using_appliances(category='single-phase induction motor')

## Exercise 3

In [None]:
import time
import pandas as pd
from six import iteritems
import matplotlib.pyplot as plt
import numpy as np

train = DataSet('./data/ukdale.h5')
test = DataSet('./data/ukdale.h5')

In [None]:
train.set_window(end="24-3-2013")
test.set_window(start="25-3-2013")

train_elec = train.buildings[3].elec
test_elec = test.buildings[3].elec

In [None]:
train_elec.plot()

In [None]:
test_elec.mains().plot()

In [None]:
mains = train_elec.mains()
mains_df = next(mains.load())
mains_df.head()

### Combinatorial Optimisation

In [None]:
start = time.time()
from nilmtk.disaggregate import CombinatorialOptimisation
from nilmtk.tests.testingtools import data_dir

co = CombinatorialOptimisation()
co.train(train_elec)
end = time.time()
print("Train runtime =", end-start, "seconds.")

In [None]:
pred = {}
gt= {}

for i, chunk in enumerate(test_elec.mains().load()):
    chunk_drop_na = chunk.dropna()
    pred[i] = co.disaggregate_chunk(chunk_drop_na)
    gt[i]={}
    
    for meter in test_elec.submeters().meters:
        # Only use the meters that we trained on (this saves time!)    
        gt[i][meter] = next(meter.load())
    gt[i] = pd.DataFrame({k:v.squeeze() for k,v in iteritems(gt[i])}, index=next(iter(gt[i].values())).index).dropna()
    
gt_overall = pd.concat(gt)
gt_overall.index = gt_overall.index.droplevel()
pred_overall = pd.concat(pred)
pred_overall.index = pred_overall.index.droplevel()

gt_overall = gt_overall[pred_overall.columns]

gt_index_utc = gt_overall.index.tz_convert("UTC")
pred_index_utc = pred_overall.index.tz_convert("UTC")
common_index_utc = gt_index_utc.intersection(pred_index_utc)

local_timezone = train.metadata['timezone']

common_index_local = common_index_utc.tz_convert(local_timezone)

gt_overall = gt_overall.ix[common_index_local]
pred_overall = pred_overall.ix[common_index_local]

gt_overall.head()

appliance_labels = [m.label() for m in gt_overall.columns.values]
gt_overall.columns = appliance_labels
pred_overall.columns = appliance_labels

pred_overall.head()

pred_overall.head(1000).plot(label="Pred")
gt_overall.head(1000).plot(label="GT")
plt.legend()

from sklearn.metrics import mean_squared_error

rms_error = {}
for appliance in gt_overall.columns:
    rms_error[appliance] = np.sqrt(mean_squared_error(gt_overall[appliance], pred_overall[appliance]))
    
pd.Series(rms_error)

### FHMM

In [None]:
start = time.time()
from nilmtk.disaggregate import fhmm_exact
fhmm = fhmm_exact.FHMM()

fhmm.train(train_elec)
end = time.time()
print("Runtime =", end-start, "seconds.")

In [None]:
pred = {}
gt= {}

for i, chunk in enumerate(test_elec.mains().load(sample_period=60)):
    chunk_drop_na = chunk.dropna()
    pred[i] = fhmm.disaggregate_chunk(chunk_drop_na)
    gt[i]={}
    
    for meter in test_elec.submeters().meters:
        # Only use the meters that we trained on (this saves time!)    
        gt[i][meter] = next(meter.load(sample_period=60))
    gt[i] = pd.DataFrame({k:v.squeeze() for k,v in iteritems(gt[i])}, index=next(iter(gt[i].values())).index).dropna()
    
gt_overall = pd.concat(gt)
gt_overall.index = gt_overall.index.droplevel()
pred_overall = pd.concat(pred)
pred_overall.index = pred_overall.index.droplevel()

gt_overall = gt_overall[pred_overall.columns]

gt_index_utc = gt_overall.index.tz_convert("UTC")
pred_index_utc = pred_overall.index.tz_convert("UTC")
common_index_utc = gt_index_utc.intersection(pred_index_utc)

local_timezone = train.metadata['timezone']

common_index_local = common_index_utc.tz_convert(local_timezone)

gt_overall = gt_overall.ix[common_index_local]
pred_overall = pred_overall.ix[common_index_local]

gt_overall.head()

appliance_labels = [m.label() for m in gt_overall.columns.values]
gt_overall.columns = appliance_labels
pred_overall.columns = appliance_labels

pred_overall.head()

pred_overall.head(1000).plot(label="Pred")
gt_overall.head(1000).plot(label="GT")
plt.legend()

from sklearn.metrics import mean_squared_error

rms_error = {}
for appliance in gt_overall.columns:
    rms_error[appliance] = np.sqrt(mean_squared_error(gt_overall[appliance], pred_overall[appliance]))
    
pd.Series(rms_error)

## Exercise 4

### Calculate F-Score of CO and FHMM

In [1]:
from __future__ import print_function, division
import time
from matplotlib import rcParams
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import warnings
from six import iteritems

warnings.filterwarnings('ignore')
%matplotlib inline

rcParams['figure.figsize'] = (13, 6)

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.disaggregate import CombinatorialOptimisation, FHMM

  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
train = DataSet('./data/ukdale.h5')
test = DataSet('./data/ukdale.h5')

  return f(*args, **kwds)


In [3]:
train.set_window(end="24-3-2013")
test.set_window(start="25-3-2013")


train_elec = train.buildings[3].elec
test_elec = test.buildings[3].elec

In [4]:
top_5_train_elec = train_elec.submeters().select_top_k(k=5)

4/4 ElecMeter(instance=5, building=3, dataset='UK-DALE', appliances=[Appliance(type='projector', instance=1)])e=1)])e=1)])

In [5]:
def predict(clf, test_elec, sample_period, timezone):
    pred = {}
    gt= {}

    for i, chunk in enumerate(test_elec.mains().load(sample_period=sample_period)):
        chunk_drop_na = chunk.dropna()
        pred[i] = clf.disaggregate_chunk(chunk_drop_na)
        gt[i]={}

        for meter in test_elec.submeters().meters:
            # Only use the meters that we trained on (this saves time!)    
            gt[i][meter] = next(meter.load(sample_period=sample_period))
        gt[i] = pd.DataFrame({k:v.squeeze() for k,v in iteritems(gt[i])}, index=next(iter(gt[i].values())).index).dropna()
        
    # If everything can fit in memory
    gt_overall = pd.concat(gt)
    gt_overall.index = gt_overall.index.droplevel()
    pred_overall = pd.concat(pred)
    pred_overall.index = pred_overall.index.droplevel()

    # Having the same order of columns
    gt_overall = gt_overall[pred_overall.columns]
    
    #Intersection of index
    gt_index_utc = gt_overall.index.tz_convert("UTC")
    pred_index_utc = pred_overall.index.tz_convert("UTC")
    common_index_utc = gt_index_utc.intersection(pred_index_utc)
    
    
    common_index_local = common_index_utc.tz_convert(timezone)
    gt_overall = gt_overall.ix[common_index_local]
    pred_overall = pred_overall.ix[common_index_local]
    appliance_labels = [m.label() for m in gt_overall.columns.values]
    gt_overall.columns = appliance_labels
    pred_overall.columns = appliance_labels
    return gt_overall, pred_overall

In [6]:
classifiers = {'CO':CombinatorialOptimisation(), 'FHMM':FHMM()}
predictions = {}
sample_period = 120
for clf_name, clf in classifiers.items():
    print("*"*20)
    print(clf_name)
    print("*" *20)
    clf.train(top_5_train_elec, sample_period=sample_period)
    output = HDFDataStore("./data/"+str(clf_name)+".h5", 'w')
    clf.disaggregate(test_elec.mains(), output, sample_period=120, resample=True)
    output.close()
    gt, predictions[clf_name] = predict(clf, test_elec, 120, train.metadata['timezone'])

********************
CO
********************
Training model for submeter 'ElecMeter(instance=3, building=3, dataset='UK-DALE', appliances=[Appliance(type='electric space heater', instance=1)])'


  return distances if squared else np.sqrt(distances, out=distances)


Training model for submeter 'ElecMeter(instance=4, building=3, dataset='UK-DALE', appliances=[Appliance(type='laptop computer', instance=1)])'


  return distances if squared else np.sqrt(distances, out=distances)


Training model for submeter 'ElecMeter(instance=2, building=3, dataset='UK-DALE', appliances=[Appliance(type='kettle', instance=1)])'
Training model for submeter 'ElecMeter(instance=5, building=3, dataset='UK-DALE', appliances=[Appliance(type='projector', instance=1)])'


  return distances if squared else np.sqrt(distances, out=distances)


Done training!
Estimating power demand for 'ElecMeter(instance=3, building=3, dataset='UK-DALE', appliances=[Appliance(type='electric space heater', instance=1)])'
Estimating power demand for 'ElecMeter(instance=4, building=3, dataset='UK-DALE', appliances=[Appliance(type='laptop computer', instance=1)])'
Estimating power demand for 'ElecMeter(instance=2, building=3, dataset='UK-DALE', appliances=[Appliance(type='kettle', instance=1)])'
Estimating power demand for 'ElecMeter(instance=5, building=3, dataset='UK-DALE', appliances=[Appliance(type='projector', instance=1)])'
Estimating power demand for 'ElecMeter(instance=3, building=3, dataset='UK-DALE', appliances=[Appliance(type='electric space heater', instance=1)])'
Estimating power demand for 'ElecMeter(instance=4, building=3, dataset='UK-DALE', appliances=[Appliance(type='laptop computer', instance=1)])'
Estimating power demand for 'ElecMeter(instance=2, building=3, dataset='UK-DALE', appliances=[Appliance(type='kettle', instance=1)

ValueError: invalid info for [index] for [tz], existing_value [Europe/London] conflicts with new value [Europe/London]

In [None]:
disag = DataSet("./data/FHMM.h5") #load FHMM prediction
disag_elec = disag.buildings[3].elec

f1 = f1_score(disag_elec, test_elec)
f1.index = disag_elec.get_labels(f1.index)
f1.plot(kind='barh')

In [None]:
def compute_f1score(gt, pred):
    from nilmtk.metrics import f1_score
    fscore = {}
    for appliance in gt.columns:
        fscore[appliance] = f1_score(predictions[appliance], gt[appliance])
    return fscore

In [None]:
f1 = {}

for clf_name in classifiers.keys():
    f1[clf_name] = f1_score(gt, predictions[clf_name])
f1 = pd.DataFrame(f1)

In [None]:
def compute_rmse(gt, pred):
    from sklearn.metrics import mean_squared_error
    rms_error = {}
    for appliance in gt.columns:
        rms_error[appliance] = np.sqrt(mean_squared_error(gt[appliance], pred[appliance]))
    return pd.Series(rms_error)

In [None]:
rmse = {}
for clf_name in classifiers.keys():
    rmse[clf_name] = compute_rmse(gt, predictions[clf_name])
rmse = pd.DataFrame(rmse)

In [None]:
rmse