**Code for Seq2point-hyperparameter tuning**
<br> This notebook can be used to tune the hyperparameters - batch size, epochs, window length and learning rate for any appliance and any training/validation data. 
<br> The tuning is done based on a leave out validation set approach
<br> Although the code facilitates tuning on offset, its best to use offset size 0.5 as inferring midpoint of the window for the predicted value almost always makes more sense.

<br>*This notebook is created in google colab. You might find indent mismatch if you open this notebook in Jupyter Notebooks.* 

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
path = "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm"

In [3]:
#This is to import all the necessary functions - from seq2point_train_sm.py, data_feeder_offset.py,appliance_data.py 
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/seq2point_train_sm.py" .
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/appliance_data.py" .
!cp "/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/data_feeder_offset.py" .

In [4]:
import pandas as pd
import os
import argparse
import tensorflow as tf 
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
from data_feeder_offset import TrainSlidingWindowGenerator


In [5]:
def remove_space(string):
    return string.replace(" ","")

In [3]:
#Input- training directory, validation directory, directory to save the tuning results
training_directory="/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/training_dir/dishwasher/dishwasher_training_.csv"  #data from houses 2,3   #has 300350 rows
validation_directory="/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/validation_dir/dishwasher/REDDdishwasher_validation_.csv"
save_directory = path +"tuning_results_dishwasher.csv" 

**Hyperparameter tuning begins here:**

In [7]:
print(training_directory)
print(validation_directory)

/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/training_dir/dishwasher/dishwasher_training_.csv
/content/drive/MyDrive/energy_disaggregation/seq2point-nilm/validation_dir/dishwasher/REDDdishwasher_validation_.csv


In [10]:
#training and validation directory are already defined in the notebook
def generate_data(batch_size, offset, window_length): 
  from data_feeder_offset import TrainSlidingWindowGenerator
  #window_offset = int(0.1 * input_window_length - 1)
  window_offset =  int((offset *window_length) - 1)

  training_chunker = TrainSlidingWindowGenerator(file_name= training_directory, 
                                        chunk_size= 5 * 10 ** 2, 
                                        batch_size= batch_size, 
                                        crop=300000, shuffle=True,   #modify crop based on the size of the training data
                                        skip_rows=0, 
                                        offset= window_offset,
                                        windowlength = window_length, 
                                        ram_threshold=5*10**5)
  validation_chunker = TrainSlidingWindowGenerator(file_name=validation_directory, 
                                            chunk_size=5 * 10 ** 2, 
                                            batch_size= batch_size, 
                                            crop=300000, shuffle=True,
                                            skip_rows=0, 
                                            offset= window_offset, 
                                            windowlength = window_length,
                                            ram_threshold=5*10**5)
  return training_chunker, validation_chunker

In [11]:
def create_model_2(input_window_length, batch_size, window_offset, learning_rate):

    """Specifies the structure of a seq2point model using Keras' functional API.

    Returns:
    model (tensorflow.keras.Model): The uncompiled seq2point model.

    """
    from tensorflow.keras.layers import Conv1D, Dense, Dropout, Reshape, Flatten, Conv2D, Input
    from tensorflow.keras.models import Sequential
    model = Sequential()
    model.add(Input(shape=(input_window_length,)))
    model.add(Reshape((1, input_window_length, 1)))
    model.add(Dropout(0.2))
    model.add(Conv2D(30,kernel_size=(10, 1), strides=(1, 1),activation="relu",input_shape=(1, input_window_length, 1), padding="same"))
    model.add(Conv2D(30, kernel_size=(8, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Conv2D(40, kernel_size=(6, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Conv2D(60, kernel_size=(5, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Dropout(.5))
    model.add(Conv2D(60, kernel_size=(5, 1), activation='relu', strides=(1, 1), padding="same"))
    model.add(Dropout(.5))
    model.add(Flatten())
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(1))

    # compile model
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate= learning_rate, beta_1=0.9, beta_2=0.999), loss="mse", metrics=["mse", "msle", "mae"]) 
    


    
    return model

In [12]:
#Validation on a leave out validation set
#Prints out the best parameter set and saves the tuning results as a csv in the save_directory
import time
t1 = time.time()
batches = [500, 1000, 2000]
epochs =  [2, 5, 10]   #[2, 5, 10]
window_length = [11, 21, 51, 99, 199, 599]
learning = [0.01, 0.001, 0.0001]
offset = [0.1, 0.3, 0.5, 0.7]
all_results = []
for batch_size in batches:
    for input_window_length in window_length:
        for epoch in epochs:
            for learning_rate in learning:
       #for window_offset in offset:
                learning_rate = 0.001
                window_offset = 0.5
                accuracy_dict = {}
                training_chunker, validation_chunker = generate_data(batch_size, window_offset, input_window_length)
                steps_per_training_epoch = np.round(int(training_chunker.total_num_samples / batch_size), decimals=0)
                model = create_model_2(input_window_length, batch_size, window_offset, learning_rate)
                training_history = model.fit(training_chunker.load_dataset(),                            
                                          steps_per_epoch=steps_per_training_epoch,
                                          epochs = epoch,
                                          verbose = 1,
                                          #callbacks=callbacks,
                                          validation_data = validation_chunker.load_dataset(),
                                          validation_freq= 1,
                                          validation_steps=100)
                accuracy_dict["batch size"] = batch_size
                accuracy_dict["window length"] = input_window_length
                accuracy_dict["window offset"] = window_offset
                accuracy_dict["epochs"] = epoch
                accuracy_dict["validation loss"] = training_history.history['val_loss'][-1]
                accuracy_dict["learning rate"] = learning_rate
                  #print(training_history.history['val_loss'])
                all_results.append(accuracy_dict)

print(all_results)
import pandas as pd
df = pd.DataFrame(all_results)
df.to_csv(save_directory) #save the tuning results to a csv file
print("\nThe best parameters are:\n")
print(df.iloc[df["validation loss"].idxmin(),:])

t2 = time.time()
print("time elapsed in hours: {}".format((t2 - t1)/3600))




Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/2
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/2
Importing training file...
Counting number of rows...
Done.
The dataset contains  300000  rows
Epoch 1/5
Counting number of rows...
Done.
The dataset contains  33371  rows
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Importing training file...
Counting number of rows...
Done.
