# Table of Contents
 <p><div class="lev1"><a href="#Create-the-data-pipeline-for-training-and-testing-sets"><span class="toc-item-num">1 - </span>Create the data pipeline for training and testing sets</a></div><div class="lev2"><a href="#Training-set"><span class="toc-item-num">1.1 - </span>Training set</a></div><div class="lev2"><a href="#Testing-set"><span class="toc-item-num">1.2 - </span>Testing set</a></div><div class="lev1"><a href="#Train-a-ConvNet-using-the-rectangle-method"><span class="toc-item-num">2 - </span>Train a ConvNet using the rectangle method</a></div><div class="lev1"><a href="#Loss,-predictions-and-scores"><span class="toc-item-num">3 - </span>Loss, predictions and scores</a></div><div class="lev1"><a href="#Reload-Model-and-Weights-to-perform-evaluation"><span class="toc-item-num">4 - </span>Reload Model and Weights to perform evaluation</a></div>

This notebook shows the training and testing of a ConvNet for the rectangle method applied to the Fridge appliance.

# Create the data pipeline for training and testing sets

## Training set

In [1]:
from __future__ import print_function
%matplotlib inline
from matplotlib.pylab import plt
from matplotlib import rcParams
rcParams['figure.figsize'] = (13, 6)
plt.style.use('ggplot')
import numpy as np
import nilmtk
from nilmtk.utils import print_dict
from nilmtk import DataSet
from neuralnilm.data.loadactivations import load_nilmtk_activations
from neuralnilm.data.syntheticaggregatesource import SyntheticAggregateSource
from neuralnilm.data.realaggregatesource import RealAggregateSource
from neuralnilm.data.stridesource import StrideSource
from neuralnilm.data.datapipeline import DataPipeline
from neuralnilm.data.processing import DivideBy, IndependentlyCenter

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution1D
from keras.optimizers import SGD
import time


# create dictionary with train, unseen_house, unseen_appliance
def select_windows(train_buildings, unseen_buildings):
    windows = {fold: {} for fold in DATA_FOLD_NAMES}

    def copy_window(fold, i):
        windows[fold][i] = WINDOWS[fold][i]

    for i in train_buildings:
        copy_window('train', i)
        copy_window('unseen_activations_of_seen_appliances', i)
    for i in unseen_buildings:
        copy_window('unseen_appliances', i)
    return windows


def filter_activations(windows, activations):
    new_activations = {
        fold: {appliance: {} for appliance in APPLIANCES}
        for fold in DATA_FOLD_NAMES}
    for fold, appliances in activations.iteritems():
        for appliance, buildings in appliances.iteritems():
            required_building_ids = windows[fold].keys()
            required_building_names = [
                'UK-DALE_building_{}'.format(i) for i in required_building_ids]
            for building_name in required_building_names:
                try:
                    new_activations[fold][appliance][building_name] = (
                        activations[fold][appliance][building_name])
                except KeyError:
                    pass
    return activations    



NILMTK_FILENAME = './redd_data/redd.h5'
SAMPLE_PERIOD = 6
STRIDE = None
APPLIANCES = ['fridge']
WINDOWS = {
    'train': {
        1: ("2011-04-19", "2011-05-21"),
        2: ("2011-04-19", "2013-05-01"),
        3: ("2011-04-19", "2013-05-26"),
        6: ("2011-05-22", "2011-06-14"),
    },
    'unseen_activations_of_seen_appliances': {
        1: ("2011-04-19", None),
        2: ("2011-04-19", None),
        3: ("2011-04-19", None),
        6: ("2011-05-22", None),
    },
    'unseen_appliances': {
        5: ("2011-04-19", None)
    }
}

# get the dictionary of activations for each appliance
activations = load_nilmtk_activations(
    appliances=APPLIANCES,
    filename=NILMTK_FILENAME,
    sample_period=SAMPLE_PERIOD,
    windows=WINDOWS
)

# get pipeline for the fridge example
num_seq_per_batch = 64
target_appliance = 'fridge'
seq_length = 512
train_buildings = [1, 2, 3, 6]
unseen_buildings = [5]
DATA_FOLD_NAMES = (
    'train', 'unseen_appliances', 'unseen_activations_of_seen_appliances')

filtered_windows = select_windows(train_buildings, unseen_buildings)
filtered_activations = filter_activations(filtered_windows, activations)

synthetic_agg_source = SyntheticAggregateSource(
    activations=filtered_activations,
    target_appliance=target_appliance,
    seq_length=seq_length,
    sample_period=SAMPLE_PERIOD
)

real_agg_source = RealAggregateSource(
    activations=filtered_activations,
    target_appliance=target_appliance,
    seq_length=seq_length,
    filename=NILMTK_FILENAME,
    windows=filtered_windows,
    sample_period=SAMPLE_PERIOD
)


# ------------
# needed to rescale the input aggregated data
# rescaling is done using the a first batch of num_seq_per_batch sequences
sample = real_agg_source.get_batch(num_seq_per_batch=1024).next()
sample = sample.before_processing
input_std = sample.input.flatten().std()
target_std = sample.target.flatten().std()
# ------------



pipeline = DataPipeline(
    [synthetic_agg_source, real_agg_source],
    num_seq_per_batch=num_seq_per_batch,
    input_processing=[DivideBy(input_std), IndependentlyCenter()],
    target_processing=[DivideBy(target_std)]
)

Using Theano backend.
the new syntax is .resample(...).ffill(limit=9)
  data = data.resample(**resample_kwargs)


Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     

  warn("As a quick implementation we only get Good Sections from"
  val = getitem(key)
use .resample(...).mean() instead of .resample(...)
  return resampled.index[0]
the new syntax is .resample(...).ffill(limit=5)
  data = data.resample(**resample_kwargs)



Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=1, dataset='REDD')     

## Testing set

In [5]:
from nilmtk import DataSet
redd = DataSet('./redd_data/redd.h5')
mains_meter = redd.buildings[unseen_buildings[0]].elec.mains()
good_sections = mains_meter.good_sections()
mains_data = mains_meter.power_series_all_data(sample_period=SAMPLE_PERIOD,
                                               sections=good_sections).dropna() 
# find the number of testing sequences in the testing set
num_test_seq = mains_data.shape[0] / seq_length

Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.
Loading data for meter ElecMeterID(instance=2, building=5, dataset='REDD')     
Done loading data all meters for this chunk.


In [57]:
fridge_data = redd.buildings[unseen_buildings[0]].elec['fridge'].power_series_all_data(sample_period=SAMPLE_PERIOD,
                                               sections=good_sections).astype(np.float32).dropna()
(mains_data, fridge_data) = mains_data.align(fridge_data, join = 'right') # merge on smallest

# Train a ConvNet using the rectangle method

In [132]:
from keras.layers import Input, Dense, Flatten
from keras.models import Model

starting_time = time.time()

# define the network architecture = Conv Net
input_seq = Input(shape = (1, seq_length))
conv1_layer =  Convolution1D(nb_filter = 16, filter_length = 3, border_mode='same',
                      init = 'normal', activation =  'linear')
conv1 = conv1_layer(input_seq)
conv2 = Convolution1D(nb_filter = 16, filter_length = 3, border_mode='same',
                      init = 'normal', activation =  'linear')(conv1)
flat = Flatten()(conv2)
dense1 = Dense(4080, activation = 'relu')(flat)
dense2 = Dense(3072, activation = 'relu')(dense1)
dense3 = Dense(2048, activation = 'relu')(dense2)
dense4 = Dense(512, activation = 'relu')(dense3)
predictions = Dense(3, activation = 'linear')(dense4)
# create the model
model = Model(input=input_seq, output=predictions)
# compile the model
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='mean_squared_error',
              optimizer='Adam')
compiling_time = time.time() - starting_time
print('compiling time = ', compiling_time)
history = model.fit_generator(pipeline.train_generator(fold = 'train'), \
                    samples_per_epoch = 64*500, \
                    nb_epoch = 1, verbose = 2)
print('run time = ', time.time() - starting_time)

compiling time =  0.377595901489
Epoch 1/1
143s - loss: 0.4186
run time =  146.517812014


In [138]:
from keras.models import model_from_json
json_string = model.to_json()
open('./tmpdata/convnet_architecture.json', 'w').write(json_string)

In [17]:
print('saving the weights ... ')
model.save_weights('./tmpdata/my_model_weights.h5', overwrite = True)
print('done saving the weights')

saving the weights ... 
done saving the weights


# Loss, predictions and scores

In [16]:
# number of sample
from neuralnilm.metrics import Metrics
score = Metrics(state_boundaries = [2.5])
Y_test = np.empty((64*num_test_seq,3))
Y_pred = np.empty((64*num_test_seq,3))
for i in range(num_test_seq):
    (x_test, y_test) = pipeline.train_generator(fold = 'unseen_appliances', source_id = 1).next()
    Y_test[64*i: 64*(i+1)] = y_test
    Y_pred[64*i: 64*(i+1)] = model.predict_on_batch(x_test)
results = score.compute_metrics(Y_pred, Y_test)
print
print(results)
    



{'regression': {'mean_squared_error': 0.8792891859781085, 'relative_error_in_total_energy': -0.36393199409262234, 'mean_absolute_error': 0.6925988615443046}, 'classification_2_state': {'f1_score': 0.009828009828009828, 'recall_score': 0.006060606060606061, 'accuracy_score': 0.9792182343234324, 'precision_score': 0.025974025974025976}}


In [21]:
# number of sample
from neuralnilm.metrics import Metrics
score = Metrics(state_boundaries = [2.5])
Y_test = np.empty((64*num_test_seq,3))
Y_pred = np.empty((64*num_test_seq,3))
for i in range(num_test_seq):
    (x_test, y_test) = pipeline.train_generator(fold = 'unseen_appliances', source_id = 1).next()
    Y_test[64*i: 64*(i+1)] = y_test
    Y_pred[64*i: 64*(i+1)] = model.predict_on_batch(x_test)
results = score.compute_metrics(Y_pred, Y_test)
print
print(results)
    




{'regression': {'mean_squared_error': 1.3554636267359714, 'relative_error_in_total_energy': -0.42735460705489553, 'mean_absolute_error': 0.7381974466547689}, 'classification_2_state': {'f1_score': 0.0449438202247191, 'recall_score': 0.19937694704049844, 'accuracy_score': 0.8597359735973598, 'precision_score': 0.025326474079936684}}


# Reload Model and Weights to perform evaluation

In [20]:
from keras.models import model_from_json
model = model_from_json(open('./tmpdata/convnet_architecture.json').read())
model.load_weights('./tmpdata/my_model_weights.h5')
model.compile(loss='mean_squared_error', optimizer='Adam')