#House Hold Electric Power Consumption Dataset

ABOUT THE DATASET

Original Source:
https://archive.ics.uci.edu/ml/datasets/individual+household+electric+power+consumption

The original Individual House Hold Electric Power Consumption Dataset has measurements of electric power consumption in one household with a one-minute sampling rate over a period of almost 4 years.

In [2]:
import urllib
import zipfile
import pandas as pd
import tensorflow as tf

This function downloads and extracts the dataset to the directory that contains this file:

In [3]:
url = 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'
urllib.request.urlretrieve(url, 'household_power.zip')
with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
  zip_ref.extractall()

Read the dataset from the CSV, normalize the dataset using min max scaling and spit into train and validation sets:

In [4]:
df = pd.read_csv('household_power_consumption.csv', sep=',',
                     infer_datetime_format=True, index_col='datetime', header=0)

# Number of features in the dataset. We use all features as predictors to predict all features at future time steps.
N_FEATURES = len(df.columns)

# This function normalizes the dataset using min max scaling.
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

# Normalizes the data
data = df.values
data = normalize_series(data, data.min(axis=0), data.max(axis=0))

# Splits the data into training and validation sets.
SPLIT_TIME = int(len(data) * 0.5)
x_train = data[:SPLIT_TIME]
x_valid = data[SPLIT_TIME:]

  df = pd.read_csv('household_power_consumption.csv', sep=',',


In [5]:
def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=n_past + n_future, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.map(lambda w: (w[:n_past], w[n_past:]))
    return ds.batch(batch_size).prefetch(1)

tf.keras.backend.clear_session()
tf.random.set_seed(42)
BATCH_SIZE = 32
# Number of past time steps based on which future observations should be predicted
N_PAST = 24
# Number of future time steps which are to be predicted.
N_FUTURE = 24
SHIFT = 1

# Code to create windowed train and validation datasets.
train_set = windowed_dataset(series=x_train, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)
valid_set = windowed_dataset(series=x_valid, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)

Code to define the model:

In [6]:
model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(N_PAST, N_FEATURES), batch_size=BATCH_SIZE, name='Input'),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(N_FEATURES, return_sequences=True)),
        tf.keras.layers.Dense(N_FEATURES)])

Code to train and compile the model:


In [7]:
optimizer=tf.keras.optimizers.Adam()
model.compile(
        optimizer=optimizer,
        loss='mae')

model.fit(train_set,validation_data=valid_set, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7fe63c2b0880>

Testing for forecasting:

In [10]:
import numpy as np

def mae(y_true, y_pred):
  return np.mean(abs(y_true.ravel() - y_pred.ravel()))


def model_forecast(model, series, window_size, batch_size):
  ds = tf.data.Dataset.from_tensor_slices(series)
  ds = ds.window(window_size, shift=1, drop_remainder=True)
  ds = ds.flat_map(lambda w: w.batch(window_size))
  ds = ds.batch(batch_size, drop_remainder=True).prefetch(1)
  forecast = model.predict(ds)
  return forecast


rnn_forecast = model_forecast(model, data, N_PAST, BATCH_SIZE)
rnn_forecast = rnn_forecast[SPLIT_TIME - N_PAST:-1, 0, :]

x_valid = x_valid[:rnn_forecast.shape[0]]
result = mae(x_valid, rnn_forecast)
print("MAE: ", result)

MAE:  0.0468116675876067


Save the model:

In [11]:
model.save("Model House Hold Electric Power Consumption.h5")

  saving_api.save_model(
