In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
path = "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm"

In [3]:
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/seq2point_train_sm.py" .
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/data_feeder.py" .
#!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/model_structure.py" .
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/seq2point_test.py" .
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/appliance_data.py" .
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/data_feeder_offset.py" .

In [5]:
import pandas as pd
import os
import argparse
#from seq2point_train_sm import Trainer
import tensorflow as tf 
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from data_feeder_offset import TrainSlidingWindowGenerator
#from model_structure import create_model, save_model


In [6]:
def remove_space(string):
    return string.replace(" ","")

In [7]:
training_directory="/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/training_dir/microwave/microwave_training_.csv"
validation_directory="/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/validation_dir/microwave/microwave_validation_.csv"

parser = argparse.ArgumentParser(description="Train sequence-to-point learning for energy disaggregation.")
parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")
parser.add_argument("--appliance_name", type=remove_space, default="microwave", help="The name of the appliance to train the network with. Default is kettle. Available are: kettle, fridge, washing machine, dishwasher, and microwave. ")
parser.add_argument("--batch_size", type=int, default="1000", help="The batch size to use when training the network. Default is 1000. ")
parser.add_argument("--crop", type=int, default="10000", help="The number of rows of the dataset to take training data from. Default is 10000. ")
#parser.add_argument("--pruning_algorithm", type=remove_space, default="default", help="The pruning algorithm that the network will train with. Default is none. Available are: spp, entropic, threshold. ")
parser.add_argument("--network_type", type=remove_space, default="seq2point", help="The seq2point architecture to use. ")
parser.add_argument("--epochs", type=int, default="2", help="Number of epochs. Default is 10. ")
parser.add_argument("--input_window_length", type=int, default="11", help="Number of input data points to network. Default is 599.")
parser.add_argument("--validation_frequency", type=int, default="1", help="How often to validate model. Default is 1. ")
parser.add_argument("--training_directory", type=str, default=training_directory, help="The dir for training data. ")
parser.add_argument("--validation_directory", type=str, default=validation_directory, help="The dir for validation data. ")

arguments = parser.parse_args()

**GridsearchCV begins here:**

In [9]:
print(training_directory)
print(validation_directory)

/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/training_dir/microwave/microwave_training_.csv
/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/validation_dir/microwave/microwave_validation_.csv


In [23]:
#training and validation directory are already defined in the notebook
def generate_data(batch_size, offset, window_length): 
  from data_feeder_offset import TrainSlidingWindowGenerator
  #window_offset = int(0.1 * input_window_length - 1)
  window_offset =  int((offset *window_length) - 1)

  training_chunker = TrainSlidingWindowGenerator(file_name= training_directory, 
                                        chunk_size= 5 * 10 ** 2, 
                                        batch_size= batch_size, 
                                        crop=300000, shuffle=True,
                                        skip_rows=0, 
                                        offset= window_offset,
                                        windowlength = window_length, 
                                        ram_threshold=5*10**5)
  validation_chunker = TrainSlidingWindowGenerator(file_name=validation_directory, 
                                            chunk_size=5 * 10 ** 2, 
                                            batch_size= batch_size, 
                                            crop=300000, shuffle=True,
                                            skip_rows=0, 
                                            offset= window_offset, 
                                            windowlength = window_length,
                                            ram_threshold=5*10**5)
  return training_chunker, validation_chunker

In [24]:
def create_model_2(input_window_length, batch_size, window_offset, learning_rate):

    """Specifies the structure of a seq2point model using Keras' functional API.

    Returns:
    model (tensorflow.keras.Model): The uncompiled seq2point model.

    """
    from tensorflow.keras.layers import Conv1D, Dense, Dropout, Reshape, Flatten, Conv2D, Input
    from tensorflow.keras.models import Sequential
    model = Sequential()
    model.add(Input(shape=(input_window_length,)))
    model.add(Reshape((1, input_window_length, 1)))
    model.add(Conv2D(30,kernel_size=(10, 1), strides=(1, 1),activation="relu",input_shape=(1, input_window_length, 1), padding="same"))
    model.add(Conv2D(30, kernel_size=(8, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Conv2D(40, kernel_size=(6, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Conv2D(60, kernel_size=(5, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Dropout(.2))
    model.add(Conv2D(60, kernel_size=(5, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Dropout(.2))
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.2))
    model.add(Dense(1))

    # compile model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= learning_rate, beta_1=0.9, beta_2=0.999), loss="mse", metrics=["mse", "msle", "mae"]) 
    


    
    return model

In [None]:
#Validation on a leave out validation set
import time
t1 = time.time()
batches = [500, 1000, 2000]
epochs =  [2, 5, 10]   #[2, 5, 10]
window_length = [11, 21, 51, 99, 199, 599]
learning = [0.01, 0.001, 0.0001]
offset = [0.1, 0.3, 0.5, 0.7]
all_results = []
for batch_size in batches:
  for input_window_length in window_length:
    for epoch in epochs:
      #for learning_rate in learning:
       for window_offset in offset:
          learning_rate = 0.001
          #window_offset = int(0.5 * (2+input_window_length) - 1)
          accuracy_dict = {}
          training_chunker, validation_chunker = generate_data(batch_size, window_offset, input_window_length)
          steps_per_training_epoch = np.round(int(training_chunker.total_num_samples / batch_size), decimals=0)
          model = create_model_2(input_window_length, batch_size, window_offset, learning_rate)
          training_history = model.fit(training_chunker.load_dataset(),                            
                                      steps_per_epoch=steps_per_training_epoch,
                                      epochs = epoch,
                                      verbose = 1,
                                      #callbacks=callbacks,
                                      validation_data = validation_chunker.load_dataset(),
                                      validation_freq= 1,
                                      validation_steps=100)
          accuracy_dict["batch size"] = batch_size
          accuracy_dict["window length"] = input_window_length
          accuracy_dict["window offset"] = window_offset
          accuracy_dict["epochs"] = epoch
          accuracy_dict["validation loss"] = training_history.history['val_loss'][-1]
          accuracy_dict["learning rate"] = learning_rate
          #print(training_history.history['val_loss'])
          all_results.append(accuracy_dict)

print(all_results)
import pandas as pd
df = pd.DataFrame(all_results)
df.to_csv(path +"tuning_results_microwave_2houses_withoffset.csv") #save the tuning results to a csv file
print("\nThe best parameters are:\n")
print(df.iloc[df["validation loss"].idxmin(),:])

t2 = time.time()
print("time elapsed in hours: {}".format((t2 - t1)/3600))




Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/5
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Importing training file...
Counting number of rows...
Done.


The best parameters are:

batch size         500.00000
window length       51.00000
window offset       25.00000
epochs              10.00000
validation loss      0.00546
learning rate        0.00100
Name: 8, dtype: float64
time elapsed in hours: 0.7008116023408042