In [1]:
import tensorflow as tf
import os
import pandas as pd
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model

import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error as mse

In [2]:
# Load data
energy_data = pd.read_csv('Dataset/household_power_consumption.txt', sep=';',header=0, parse_dates={'DateTime' : ['Date','Time']}, infer_datetime_format=True, na_values=['nan','?'], index_col='DateTime')
energy_data.drop(['Global_reactive_power'], axis=1, inplace=True)
energy_data.drop(['Voltage'], axis=1, inplace=True)
energy_data.drop(['Global_intensity'], axis=1, inplace=True)
energy_data.drop(['Sub_metering_1'], axis=1, inplace=True)
energy_data.drop(['Sub_metering_2'], axis=1, inplace=True)
energy_data.drop(['Sub_metering_3'], axis=1, inplace=True)

# Displaying head of data
energy_data.head()

Unnamed: 0_level_0,Global_active_power
DateTime,Unnamed: 1_level_1
2006-12-16 17:24:00,4.216
2006-12-16 17:25:00,5.36
2006-12-16 17:26:00,5.374
2006-12-16 17:27:00,5.388
2006-12-16 17:28:00,3.666


In [3]:
def impute_nan_values(df):
    
    """
    Function to impute values
    
    Input : Dataframe with missing values
    Output : Dataframe with imputed values
    """
    
    data = []
    for d in list(energy_data.groupby(energy_data.index.year)):
        
        data.append(d[1].fillna(d[1].mean()))
        
    return pd.DataFrame(np.vstack(data), index=df.index, columns=df.columns)

In [4]:
# Impute NaN values
energy_data = impute_nan_values(energy_data)

In [5]:
# Check missing values after imputation
energy_data.isnull().sum()

Global_active_power    0
dtype: int64

In [8]:
hourly_energy_data = energy_data.resample('H').sum()
hourly_energy_data['Seconds'] = hourly_energy_data.index.map(pd.Timestamp.timestamp)
hourly_energy_data.head()

Unnamed: 0_level_0,Global_active_power,Seconds
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1
2006-12-16 17:00:00,152.024,1166288000.0
2006-12-16 18:00:00,217.932,1166292000.0
2006-12-16 19:00:00,204.014,1166296000.0
2006-12-16 20:00:00,196.114,1166299000.0
2006-12-16 21:00:00,183.388,1166303000.0


In [9]:
day = 60*60*24
year = 365.2425*day
month = 365.2425*day/12

hourly_energy_data['Day Sin'] = np.sin(hourly_energy_data['Seconds'] * (2* np.pi / day))
hourly_energy_data['Day Cos'] = np.cos(hourly_energy_data['Seconds'] * (2 * np.pi / day))
hourly_energy_data['Month Sin'] = np.sin(hourly_energy_data['Seconds'] * (2* np.pi / month))
hourly_energy_data['Month Cos'] = np.cos(hourly_energy_data['Seconds'] * (2 * np.pi / month))
hourly_energy_data['Year Sin'] = np.sin(hourly_energy_data['Seconds'] * (2 * np.pi / year))
hourly_energy_data['Year Cos'] = np.cos(hourly_energy_data['Seconds'] * (2 * np.pi / year))
hourly_energy_data.head()

Unnamed: 0_level_0,Global_active_power,Seconds,Day Sin,Day Cos,Month Sin,Month Cos,Year Sin,Year Cos
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2006-12-16 17:00:00,152.024,1166288000.0,-0.965926,-0.258819,0.00944,-0.999955,-0.259579,0.965722
2006-12-16 18:00:00,217.932,1166292000.0,-1.0,-4.501728e-12,0.000839,-1.0,-0.258887,0.965908
2006-12-16 19:00:00,204.014,1166296000.0,-0.965926,0.258819,-0.007763,-0.99997,-0.258194,0.966093
2006-12-16 20:00:00,196.114,1166299000.0,-0.866025,0.5,-0.016363,-0.999866,-0.257502,0.966278
2006-12-16 21:00:00,183.388,1166303000.0,-0.707107,0.7071068,-0.024963,-0.999688,-0.256809,0.966462


In [10]:
hourly_energy_data = hourly_energy_data.drop('Seconds', axis=1)
hourly_energy_data.head()

Unnamed: 0_level_0,Global_active_power,Day Sin,Day Cos,Month Sin,Month Cos,Year Sin,Year Cos
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:00:00,152.024,-0.965926,-0.258819,0.00944,-0.999955,-0.259579,0.965722
2006-12-16 18:00:00,217.932,-1.0,-4.501728e-12,0.000839,-1.0,-0.258887,0.965908
2006-12-16 19:00:00,204.014,-0.965926,0.258819,-0.007763,-0.99997,-0.258194,0.966093
2006-12-16 20:00:00,196.114,-0.866025,0.5,-0.016363,-0.999866,-0.257502,0.966278
2006-12-16 21:00:00,183.388,-0.707107,0.7071068,-0.024963,-0.999688,-0.256809,0.966462


In [11]:
# Split the data into train and test
X2_train, X2_validate, X2_test = train_test_split(hourly_energy_data)
X2_train.shape, X2_validate.shape, X2_test.shape

((17294, 7), (6918, 7), (10377, 7))

In [12]:
#cofiguring window size
window_size=24

In [13]:
def df_to_X_y2(df):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_size):
    row = [r for r in df_as_np[i:i+window_size]]
    X.append(row)
    label = df_as_np[i+window_size][0]
    y.append(label)
  return np.array(X), np.array(y)

In [15]:
X2_train, y2_train = df_to_X_y2(X2_train)
X2_val, y2_val = df_to_X_y2(X2_validate)
X2_test, y2_test = df_to_X_y2(X2_test)

In [16]:
X2_train.shape, y2_train.shape, X2_val.shape, y2_val.shape, X2_test.shape, y2_test.shape

((17270, 24, 7), (17270,), (6894, 24, 7), (6894,), (10353, 24, 7), (10353,))

### Standardizing for Normalizing data

In [17]:
temp_training_mean = np.mean(X2_train[:, :, 0])
temp_training_std = np.std(X2_train[:, :, 0])
                           
def preprocess(X):
  X[:, :, 0] = (X[:, :, 0] - temp_training_mean) / temp_training_std
  return X

In [18]:
preprocess(X2_train)
preprocess(X2_val)
preprocess(X2_test)

array([[[ 1.44425031e+00, -2.58819045e-01, -9.65925826e-01, ...,
         -6.19834803e-01, -9.82638691e-01, -1.85529518e-01],
        [ 7.40033353e-01, -5.00000000e-01, -8.66025404e-01, ...,
         -6.13062170e-01, -9.82771423e-01, -1.84825132e-01],
        [ 4.48835824e-01, -7.07106781e-01, -7.07106781e-01, ...,
         -6.06244181e-01, -9.82903650e-01, -1.84120651e-01],
        ...,
        [ 2.96048703e-01,  5.00000000e-01, -8.66025404e-01, ...,
         -4.68774614e-01, -9.85319937e-01, -1.70717958e-01],
        [ 3.02062040e-01,  2.58819045e-01, -9.65925826e-01, ...,
         -4.61159608e-01, -9.85442052e-01, -1.70011655e-01],
        [ 6.33486714e-01,  1.14370293e-11, -1.00000000e+00, ...,
         -4.53510483e-01, -9.85563660e-01, -1.69305264e-01]],

       [[ 7.40033353e-01, -5.00000000e-01, -8.66025404e-01, ...,
         -6.13062170e-01, -9.82771423e-01, -1.84825132e-01],
        [ 4.48835824e-01, -7.07106781e-01, -7.07106781e-01, ...,
         -6.06244181e-01, -9.82903650e