# Utilisation d'un réseau de neurone convolutionnel pour la désagrégation de consommation

Dans ce notebook on applique un réseau de neurone convolutionnel pour désagréger la consommation pour un appareil. La méthode est reprise en grande partie du travail de Jake Kelly (https://arxiv.org/pdf/1507.06594.pdf ,https://github.com/JackKelly/neuralnilm). On utilisera la bibliothèque `neuralnilm` pour créer le pipeline de données de training. On utilise la bibliothèque `keras` pour le réseau de neurone.

On travaille sur le jeu de données REDD "low_freq" disponible ici : http://redd.csail.mit.edu/

## Chargement des données

In [1]:
import numpy as np
import nilmtk
from nilmtk import DataSet, MeterGroup
from nilmtk.utils import print_dict

# import dataset

dataset = DataSet("data/REDD/redd.h5")

In [2]:
# On peut observer le type d'appareil par maison
for building in dataset.buildings.items():
    print("bâtiment",building[0],",", len(building[1].elec.appliances),"appareils")
    for appliance in building[1].elec.appliances:
        print(appliance)

bâtiment 1 , 16 appareils
Appliance(type='sockets', instance=4)
Appliance(type='light', instance=3)
Appliance(type='washer dryer', instance=1)
Appliance(type='light', instance=2)
Appliance(type='sockets', instance=3)
Appliance(type='electric stove', instance=1)
Appliance(type='electric oven', instance=1)
Appliance(type='light', instance=1)
Appliance(type='electric space heater', instance=1)
Appliance(type='microwave', instance=1)
Appliance(type='unknown', instance=2)
Appliance(type='sockets', instance=2)
Appliance(type='fridge', instance=1)
Appliance(type='dish washer', instance=1)
Appliance(type='sockets', instance=1)
Appliance(type='unknown', instance=1)
bâtiment 2 , 9 appareils
Appliance(type='washer dryer', instance=1)
Appliance(type='waste disposal unit', instance=1)
Appliance(type='electric stove', instance=1)
Appliance(type='light', instance=1)
Appliance(type='microwave', instance=1)
Appliance(type='sockets', instance=2)
Appliance(type='fridge', instance=1)
Appliance(type='dish 

## Preprocessing des données, création des données d'entrainement et de test

On va maintenant préparer les données en batch de séquences à l'aide de la bibliothèque neuralnilm. Les étapes du preprocessing sont détaillées dans le rapport.



In [3]:
import nilmtk
from nilmtk.utils import print_dict
from nilmtk import DataSet
from neuralnilm.data.loadactivations import load_nilmtk_activations
from neuralnilm.data.syntheticaggregatesource import SyntheticAggregateSource
from neuralnilm.data.realaggregatesource import RealAggregateSource
from neuralnilm.data.stridesource import StrideSource
from neuralnilm.data.datapipeline import DataPipeline
from neuralnilm.data.processing import DivideBy, IndependentlyCenter

def select_windows(train_buildings, unseen_buildings):
    windows = {fold: {} for fold in DATA_FOLD_NAMES}

    def copy_window(fold, i):
        windows[fold][i] = WINDOWS[fold][i]

    for i in train_buildings:
        copy_window('train', i)
        copy_window('unseen_activations_of_seen_appliances', i)
    for i in unseen_buildings:
        copy_window('unseen_appliances', i)
    return windows


def filter_activations(windows, activations):
    new_activations = {
        fold: {appliance: {} for appliance in APPLIANCES}
        for fold in DATA_FOLD_NAMES}
    for fold, appliances in activations.items():
        for appliance, buildings in appliances.items():
            required_building_ids = windows[fold].keys()
            required_building_names = [
                'building_{}'.format(i) for i in required_building_ids]
            for building_name in required_building_names:
                try:
                    new_activations[fold][appliance][building_name] = (
                        activations[fold][appliance][building_name])
                except KeyError:
                    pass
    return activations

ModuleNotFoundError: No module named 'neuralnilm.data'

In [None]:
NILMTK_FILENAME = './data/REDD/redd.h5'

# Période d'échantillonage la plus longue pour pouvoir comparer les données qui ont des périodes d'échantillonage différentes.
SAMPLE_PERIOD = 3
# Tous les appareil à considérer. Un de ces appareil sera l'appareil visé par l'algo, les autres servent à la création de données artificielle.
APPLIANCES = ['washer dryer',
 'fridge',
 'microwave',
 'dish washer']

WINDOWS = {
    'train': {
        1: ("2011-04-18", "2011-05-24"),
        2: ("2011-04-17", "2011-05-22"),
        3: ("2011-04-16", "2013-05-30"),
        6: ("2011-05-21", "2011-06-14"),
    },
    'unseen_activations_of_seen_appliances': {
        1: ("2011-04-19", None),
        2: ("2011-04-19", None),
        3: ("2011-04-19", None),
        6: ("2011-05-22", None),
    },
    'unseen_appliances': {
        5: ("2011-04-18", None)
    }
}

# get the dictionary of activations for each appliance
activations = load_nilmtk_activations(
    appliances=APPLIANCES,
    filename=NILMTK_FILENAME,
    sample_period=SAMPLE_PERIOD,
    windows=WINDOWS
)

# ------------
# get pipeline for the fridge 
# ------------
num_seq_per_batch = 16
target_appliance = 'fridge'
# à choisir en fonction de la longueur des activations et de la période d'échantillonage
seq_length = 512
train_buildings = [1, 2, 3, 6]
unseen_buildings = [5]
DATA_FOLD_NAMES = (
    'train', 'unseen_appliances', 'unseen_activations_of_seen_appliances')

filtered_windows = select_windows(train_buildings, unseen_buildings)
filtered_activations = filter_activations(filtered_windows, activations)

synthetic_agg_source = SyntheticAggregateSource(
    activations=filtered_activations,
    target_appliance=target_appliance,
    seq_length=seq_length,
    sample_period=SAMPLE_PERIOD
)

real_agg_source = RealAggregateSource(
    activations=filtered_activations,
    target_appliance=target_appliance,
    seq_length=seq_length,
    filename=NILMTK_FILENAME,
    windows=filtered_windows,
    sample_period=SAMPLE_PERIOD
)
# ------------
# Normalisation des données
# rescaling is done using the a first batch of num_seq_per_batch sequences
sample = next(real_agg_source.get_batch(num_seq_per_batch=1024))
sample = sample.before_processing
input_std = sample.input.flatten().std()
target_std = sample.target.flatten().std()
# ------------

pipeline = DataPipeline(
    [synthetic_agg_source, real_agg_source],
    num_seq_per_batch=num_seq_per_batch,
    input_processing=[DivideBy(input_std), IndependentlyCenter()],
    target_processing=[DivideBy(target_std)]
)

In [1]:
# ------------
# create the validation set
# ------------

num_test_seq = 101

X_valid = np.empty((num_test_seq*num_seq_per_batch, seq_length))
Y_valid = np.empty((num_test_seq*num_seq_per_batch, 3)) # 3 for the 3 output neurons

for i in range(num_test_seq):
    (x_valid,y_valid) = next(pipeline.train_generator(fold = 'unseen_appliances', source_id = 1)) #source id : 1 pour les données réelles seulement, 0 pour les données artificielles seulement.
    X_valid[i*num_seq_per_batch: (i+1)*num_seq_per_batch,:] = x_valid[:,:,0]
    Y_valid[i*num_seq_per_batch:  (i+1)*num_seq_per_batch,:] = y_valid
X_valid = np.reshape(X_valid, [X_valid.shape[0],X_valid.shape[1],1])

NameError: name 'np' is not defined

In [None]:
# Create Test Set from unseen_appliances

mains_meter = dataset.buildings[unseen_buildings[0]].elec.mains()
good_sections = mains_meter.good_sections()
mains_data = mains_meter.power_series_all_data(sample_period=SAMPLE_PERIOD,
                                               sections=good_sections).dropna()
# find the number of testing sequences in the testing set
num_test_seq = int(mains_data.shape[0] / seq_length)

Y_test = np.empty((num_seq_per_batch*num_test_seq,3))
X_test = np.empty((num_seq_per_batch*num_test_seq,seq_length))
for i in range(num_test_seq):
    (x_test,y_test) = next(pipeline.train_generator(fold = 'unseen_appliances', source_id = 1))
    X_test[num_seq_per_batch*i: num_seq_per_batch*(i+1),:] = x_test[:,:,0]
    Y_test[num_seq_per_batch*i: num_seq_per_batch*(i+1),:] = y_test
X_test = np.reshape(X_test, [X_test.shape[0],X_test.shape[1],1])

# Neural Network architecture

On implémente le réseau de neurone avec `keras`

In [None]:
from keras.layers import Input, Dense, Flatten, MaxPooling1D, AveragePooling1D, Conv1D
from keras import Sequential
from keras.models import Model
import keras.callbacks
import time
from keras.models import model_from_json
import pickle

# chemins pour la sauvegarde des résultats
# ------------
exp_number = 32
output_architecture = './tmpdata/convnet_architecture_exp' + str(exp_number) + '.json'
best_weights_during_run = './tmpdata/weights_exp' + str(exp_number) + '.h5'
final_weights = './tmpdata/weights_exp' + str(exp_number) + '_final.h5'
loss_history = './tmpdata/history_exp' + str(exp_number) + '.pickle'
# ------------

# a class used to record the training and validation loss
# at the end of each epoch

class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.train_losses = []
        self.valid_losses = []

    def on_epoch_end(self, epoch, logs = {}):
        self.train_losses.append(logs.get('loss'))
        self.valid_losses.append(logs.get('val_loss'))
        

In [2]:
# Definition du modèle

input_shape = (seq_length, 1)
model = Sequential(
    [
        Conv1D(filters=16,kernel_size=3,input_shape=input_shape, activation="relu"),
        Conv1D(filters=16,kernel_size=3,input_shape=input_shape, activation="relu"),
        Flatten(),
        Dense(4096,activation="relu"),
        Dense(3072,activation="relu"),
        Dense(2048,activation="relu"),
        Dense(512, activation="relu"),
        Dense(3, activation="linear")
    ]
)

model.compile(loss="mean_squared_error", optimizer="Adam")

model.summary()

NameError: name 'seq_length' is not defined

In [None]:
# record the loss history
history = LossHistory()
# save the weigths when the vlaidation lost decreases only
checkpointer = keras.callbacks.ModelCheckpoint(filepath=best_weights_during_run, save_best_only=True, verbose =1 )
#tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# Entrainement du modèle

In [None]:
model.fit(pipeline.train_generator(fold = 'train'),
          epochs=20,
          validation_data=(x_valid,y_valid),
          max_queue_size= 50,
          steps_per_epoch=8000/num_seq_per_batch,
          callbacks=[history, checkpointer, tensorboard_callback],
          verbose=2)

In [3]:
# Sauvegarde des résultats

losses_dic = {'train_loss': history.train_losses, 'valid_loss':history.valid_losses}
# save history
losses_dic = {'train_loss': history.train_losses, 'valid_loss':history.valid_losses}
with open(loss_history, 'wb') as handle:
  pickle.dump(losses_dic, handle)

print('\n saving the architecture of the model \n')
json_string = model.to_json()
open(output_architecture, 'w').write(json_string)

print('\n saving the final weights ... \n')
model.save_weights(final_weights, overwrite = True)
print('done saving the weights')

print('\n saving the training and validation losses')

print('This was the model trained')
print(model.summary())

# Evaluation du modèle

In [None]:
%matplotlib inline
from keras.models import model_from_json
# load experiment number
exp_number = 31
#load model architecture
model_architecture = './tmpdata/convnet_architecture_exp' + str(exp_number) + '.json'
# load the weights for the lowest validation loss during training
best_weights_during_run = './tmpdata/weights_exp' + str(exp_number) + '.h5'
# load the final weights at the end of the 20 epochs
final_weights = './tmpdata/weights_exp' + str(exp_number) + '_final.h5'
# load model form json
model = model_from_json(open(model_architecture).read())

# load intermediate or final weights
model.load_weights(best_weights_during_run)
model.compile(loss="mean_squared_error", optimizer="Adam")
# print the summary of the architecture
model.summary()
# load the loss summary (training and validation losses)
import pickle
losses = pickle.load( open('./tmpdata/history_exp' + str(exp_number) + '.pickle'   , 'rb'))
# load the test set
test_set = pickle.load( open('neuralnilm/tmpdata/TestSetRePickled.pickle', 'rb'))
X_test = test_set['X_test']
Y_test = test_set["Y_test"]

# Here we predict the output from the neural network and show the scores
from neuralnilm.scores import scores
Y_pred = model.predict(X_test)
scores(Y_pred, Y_test, print_results=True)

In [None]:
# On peut afficher quelques résultats visuellement pour se faire une idée 

