Train Stove using Window GRU model: train1_2_test4_epochs_10_batch_256

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
path = "/content/drive/MyDrive/energy_disaggregation/windowGRU"

In [None]:
import pandas as pd
import numpy as np

from keras.models import load_model
from keras.models import Sequential
from keras.layers import Dense, Conv1D, GRU, Bidirectional, Dropout
from keras.utils import plot_model

import matplotlib.pyplot as plt
import time
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import display
import datetime
import math
import warnings
warnings.filterwarnings("ignore")
import glob
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
window_size = 50

# Functions

In [None]:
def read_label():
    label = {}
    for i in range(1,6):
        hi = '/content/drive/MyDrive/energy_disaggregation/windowGRU/house_{}/labels.dat'.format(i)
        label[i] = {}
        with open(hi) as f:
            for line in f:
                splitted_line = line.split(' ')
                label[i][int(splitted_line[0])] = splitted_line[1].strip() + '_' + splitted_line[0]
    return label


#-------------------------------------------------------------------------------------------------------------------

def read_merge_data(house):
    path = '/content/drive/MyDrive/energy_disaggregation/windowGRU/house_{}/'.format(house)
    
    
    file = path + 'channel_1.dat'

    df = pd.read_table(file, sep = ' ', names = ['unix_time', labels[house][1]], 
                                       dtype = {'unix_time': 'int64', labels[house][1]:'float64'}) 

    num_apps = len(glob.glob(path + 'channel*'))

    
    for i in range(2, num_apps + 1):
        file = path + 'channel_{}.dat'.format(i)
        data = pd.read_table(file, sep = ' ', names = ['unix_time', labels[house][i]], 
                                       dtype = {'unix_time': 'int64', labels[house][i]:'float64'})
        df = pd.merge(df, data, how = 'inner', on = 'unix_time')
    df['timestamp'] = df['unix_time'].astype("datetime64[s]")
    df = df.set_index(df['timestamp'].values)
    df.drop(['unix_time','timestamp'], axis=1, inplace=True)
    return df

#-------------------------------------------------------------------------------------------------------------------

def uniform_label(df):
  if 'stove_14' in df.columns:   
    df = df.rename(columns={'stove_14': 'stove'})

  if 'stove_5' in df.columns:
    df = df.rename(columns={'stove_5': 'stove'})

  if 'stove_8' in df.columns:
    print("true")
    df = df.rename(columns={'stove_8': 'stove'})

  return df

#----------------------------------------------------------------------------------------------------------------------

def add_houses(df_list, appliance):
  merged_df = pd.DataFrame()
  for df in df_list:
    df = uniform_label(df)

    df["aggregate_mains"] = df['mains_1'] + df['mains_2']
    df = df[['aggregate_mains',appliance]]
    
    merged_df = pd.concat([merged_df,df])
  return merged_df

#-----------------------------------------------------------------------------------------------------------------------

def max_value(df):
  return max(df['aggregate_mains'])

#-----------------------------------------------------------------------------------------------------------------------

def preprocess_data(df, appliance, nmax):

  df['aggregate_mains'] = df['aggregate_mains']/ nmax

  df[appliance] = df[appliance]/ nmax
  
  return df 

#----------------------------------------------------------------------------------------------------------------------
def create_window_chunk(df, sequence_length):
  result = []
  for index in range(len(df) - sequence_length):
      result.append(df[index: index + sequence_length])

  return result

#----------------------------------------------------------------------------------------------------------------------

def _create_model(window_size):
    '''Creates the GRU architecture described in the paper
    '''
    model = Sequential()

    # 1D Conv
    model.add(Conv1D(16, 4, activation='relu', input_shape=(window_size,1), padding="same", strides=1))

    #Bi-directional GRUs
    model.add(Bidirectional(GRU(64, activation='relu', return_sequences=True), merge_mode='concat'))
    model.add(Dropout(0.5))
    model.add(Bidirectional(GRU(128, activation='relu', return_sequences=False), merge_mode='concat'))
    model.add(Dropout(0.5))

    # Fully Connected Layers
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='linear'))

    model.compile(loss='mse', optimizer='adam')
    print(model.summary())

    return model

#---------------------------------------------------------------------------------------------------------------------







# Preprocess Data

In [None]:
df_dic = {}
dates = {}

In [None]:
labels = read_label()
#labels

In [None]:
for i in range(1,6):
    df_dic[i] = read_merge_data(i)
    
#print(df_dic)

In [None]:
for i in range(1,6):
    dates[i] = [str(time)[:10] for time in df_dic[i].index.values]
    dates[i] = sorted(list(set(dates[i])))
    #print('House {0} data contain {1} days from {2} to {3}.'.format(i,len(dates[i]),dates[i][0], dates[i][-1]))
    #print(dates[i], '\n')

In [None]:
for i in range(1,6):
    print('House {} data has shape: '.format(i), df_dic[i].shape)
    display(df_dic[i].tail(3))

House 1 data has shape:  (406748, 20)


Unnamed: 0,mains_1,mains_2,oven_3,oven_4,refrigerator_5,dishwaser_6,kitchen_outlets_7,kitchen_outlets_8,lighting_9,washer_dryer_10,microwave_11,bathroom_gfi_12,electric_heat_13,stove_14,kitchen_outlets_15,kitchen_outlets_16,lighting_17,lighting_18,washer_dryer_19,washer_dryer_20
2011-05-24 19:56:27,235.46,38.61,0.0,0.0,190.0,0.0,24.0,20.0,2.0,0.0,4.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2011-05-24 19:56:30,235.98,38.77,0.0,0.0,189.0,0.0,24.0,20.0,2.0,0.0,4.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
2011-05-24 19:56:34,235.29,38.83,0.0,0.0,186.0,0.0,26.0,20.0,2.0,0.0,4.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0


House 2 data has shape:  (316840, 11)


Unnamed: 0,mains_1,mains_2,kitchen_outlets_3,lighting_4,stove_5,microwave_6,washer_dryer_7,kitchen_outlets_8,refrigerator_9,dishwaser_10,disposal_11
2011-05-22 23:59:01,10.84,252.61,0.0,9.0,0.0,5.0,0.0,2.0,158.0,0.0,0.0
2011-05-22 23:59:04,10.88,253.02,0.0,9.0,0.0,4.0,0.0,2.0,160.0,0.0,0.0
2011-05-22 23:59:08,10.84,252.77,0.0,9.0,0.0,4.0,0.0,2.0,157.0,0.0,0.0


House 3 data has shape:  (376150, 22)


Unnamed: 0,mains_1,mains_2,outlets_unknown_3,outlets_unknown_4,lighting_5,electronics_6,refrigerator_7,disposal_8,dishwaser_9,furance_10,lighting_11,outlets_unknown_12,washer_dryer_13,washer_dryer_14,lighting_15,microwave_16,lighting_17,smoke_alarms_18,lighting_19,bathroom_gfi_20,kitchen_outlets_21,kitchen_outlets_22
2011-05-31 00:19:30,15.45,2.23,0.0,1.0,1.0,1139.0,8.0,6.0,3.0,7.0,147.0,0.0,0.0,0.0,1.0,2.0,10.0,0.0,0.0,0.0,0.0,2.5
2011-05-31 00:19:33,15.42,2.23,0.0,1.0,1.0,1138.0,8.0,6.0,3.0,7.0,147.0,0.0,0.0,0.0,1.0,2.0,10.0,0.0,0.0,0.0,0.0,2.5
2011-05-31 00:19:36,15.44,2.23,0.0,1.0,1.0,1134.0,7.0,6.0,3.0,7.0,147.0,0.0,0.0,0.0,1.0,2.0,10.0,0.0,0.0,0.0,0.0,2.5


House 4 data has shape:  (428076, 20)


Unnamed: 0,mains_1,mains_2,lighting_3,furance_4,kitchen_outlets_5,outlets_unknown_6,washer_dryer_7,stove_8,air_conditioning_9,air_conditioning_10,miscellaeneous_11,smoke_alarms_12,lighting_13,kitchen_outlets_14,dishwaser_15,bathroom_gfi_16,bathroom_gfi_17,lighting_18,lighting_19,air_conditioning_20
2011-06-04 00:45:31,403.36,94.65,173.0,27.0,38.0,3.0,1.0,5.0,0.0,0.0,1.0,0.0,25.0,135.0,0.0,0.0,1.0,0.0,0.0,0.0
2011-06-04 00:45:35,401.11,94.63,174.0,27.0,34.0,4.0,1.0,5.0,0.0,0.0,1.0,0.0,25.0,135.0,0.0,0.0,1.0,0.0,0.0,0.0
2011-06-04 00:45:38,400.24,94.43,175.0,27.0,38.0,4.0,1.0,5.0,0.0,0.0,2.0,0.0,25.0,135.0,0.0,0.0,1.0,0.0,0.0,0.0


House 5 data has shape:  (77451, 26)


Unnamed: 0,mains_1,mains_2,microwave_3,lighting_4,outlets_unknown_5,furance_6,outlets_unknown_7,washer_dryer_8,washer_dryer_9,subpanel_10,subpanel_11,electric_heat_12,electric_heat_13,lighting_14,outlets_unknown_15,bathroom_gfi_16,lighting_17,refrigerator_18,lighting_19,dishwaser_20,disposal_21,electronics_22,lighting_23,kitchen_outlets_24,kitchen_outlets_25,outdoor_outlets_26
2011-06-01 00:20:07,151.68,37.39,3.0,0.0,3.0,7.0,0.0,0.0,0.0,12.5,5.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,5.0,115.0,0.0,0.0,0.0
2011-06-01 00:20:10,150.42,37.21,3.0,0.0,3.0,7.0,0.0,0.0,0.0,12.5,5.0,0.0,0.0,3.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,5.0,100.0,0.0,0.0,0.0
2011-06-01 00:20:14,149.29,37.1,3.0,0.0,3.0,7.0,0.0,0.0,0.0,12.5,5.0,0.0,0.0,3.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,5.0,97.5,0.0,0.0,0.0


# Preparing train data - house 1

In [None]:
df_list = [df_dic[1], df_dic[2]]
dfs = add_houses(df_list, 'stove')

In [None]:
print(dfs.head())

                     aggregate_mains  stove
2011-04-18 13:22:13           341.03    0.0
2011-04-18 13:22:16           342.36    0.0
2011-04-18 13:22:20           342.52    0.0
2011-04-18 13:22:23           342.07    0.0
2011-04-18 13:22:26           341.77    0.0


In [None]:
nmax= max_value(dfs)

In [None]:
df = preprocess_data(dfs, 'stove', nmax)
print(df.head())


                     aggregate_mains  stove
2011-04-18 13:22:13         0.028594    0.0
2011-04-18 13:22:16         0.028706    0.0
2011-04-18 13:22:20         0.028719    0.0
2011-04-18 13:22:23         0.028681    0.0
2011-04-18 13:22:26         0.028656    0.0


# Create window chunk and normalize dataframe

In [None]:
sequence_length = window_size

x = create_window_chunk(df['aggregate_mains'], sequence_length)

y = df['stove'][sequence_length:]

In [None]:
x = np.array(x)
y = np.array(y)

In [None]:
x.shape

(723538, 50)

In [None]:
y.shape


(723538,)

## reshape

In [None]:
x_train = np.reshape(x, (x.shape[0], x.shape[1], 1))

In [None]:
x_train.shape

(723538, 50, 1)

# Model

In [None]:
model = _create_model(window_size)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 50, 16)            80        
_________________________________________________________________
bidirectional (Bidirectional (None, 50, 128)           31488     
_________________________________________________________________
dropout (Dropout)            (None, 50, 128)           0         
_________________________________________________________________
bidirectional_1 (Bidirection (None, 256)               198144    
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               32896     
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0

In [None]:
start = time.time()
model.fit(x_train, y, epochs=10, batch_size=256)
print('Finish trainning. Time: ', time.time() - start)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Finish trainning. Time:  19109.954473018646


In [None]:
model.save("refrigerator_train_1_2_epochs_10_batch_256_model.h5")

# Preparing test data - house 4





In [None]:
df_list = [df_dic[4]]
dfs_test = add_houses(df_list, 'stove')


true


In [None]:
df_test = preprocess_data(dfs_test, "stove", nmax)

print(df_test.head())

                     aggregate_mains     stove
2011-04-17 01:16:32         0.046022  0.000419
2011-04-17 01:16:35         0.046029  0.000419
2011-04-17 01:16:38         0.046074  0.000419
2011-04-17 01:16:42         0.046055  0.000419
2011-04-17 01:16:45         0.046117  0.000419


In [None]:
sequence_length = window_size

x_test = create_window_chunk(df_test['aggregate_mains'], sequence_length)

y_test = df_test['stove'][sequence_length:]


In [None]:
x_test = np.array(x_test)
y_test = np.array(y_test)

In [None]:
x_test.shape

(428026, 50)

In [None]:
y_test.shape

(428026,)

In [None]:
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

In [None]:
pred_test = model.predict(x_test, batch_size=256)

In [None]:
print(pred_test)
print("-------------------")
pred_test.shape

[[0.00021223]
 [0.00021223]
 [0.00021223]
 ...
 [0.00021223]
 [0.00021223]
 [0.00021223]]
-------------------


(428026, 1)

In [None]:
pred_test = np.reshape(pred_test, (len(pred_test)))

In [None]:
pred_test

array([0.00021223, 0.00021223, 0.00021223, ..., 0.00021223, 0.00021223,
       0.00021223], dtype=float32)

In [None]:
max(pred_test)

0.00021223343

In [None]:
pred_test.shape

(428026,)

In [None]:
max(y_test)

In [None]:
y_test.shape

## Denormalized the data

In [None]:
y_test_d = y_test * nmax

In [None]:
max(y_test_d)

In [None]:
y_pred_d = pred_test* nmax

In [None]:
max(y_pred_d)

In [None]:
plt.plot(y_pred_d, color='red', label="predicted stove")
plt.plot(y_test_d, color='green', label="ground stove")
plt.show

In [None]:
# on denormalized data

mean_absolute_error(y_test_d, y_pred_d)

In [None]:
math.sqrt(mean_squared_error(y_test_d, y_pred_d))

In [None]:
plt.plot(y_pred_d[50000:100000], color='red')
plt.plot(y_test_d[50000:100000], color='green')
plt.show