In [1]:
# Make sure that the model architecture and input, output shapes match our
# requirements by printing model.summary() and reviewing its output.
#
# HINT: If you follow all the rules mentioned above and throughout this
# question while training your neural network, there is a possibility that a
# validation MAE of approximately 0.055 or less on the normalized validation
# dataset may fetch you top marks.
import urllib
import zipfile
 
import pandas as pd
import tensorflow as tf

In [2]:
import pathlib
file1 = pathlib.Path("household_power.zip")
file2 = pathlib.Path("household_power_consumption.csv")
if file1.exists ():
  print("CSV file exist")
  if file2.exists():
    print("Zip file exist")    
else:
  !wget 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'
  with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
    zip_ref.extractall()


--2023-01-15 00:11:57--  https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.12.128, 172.217.194.128, 74.125.68.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.12.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 859850 (840K) [application/zip]
Saving to: ‘household_power.zip’


2023-01-15 00:11:58 (642 KB/s) - ‘household_power.zip’ saved [859850/859850]



In [3]:
# This function normalizes the dataset using min max scaling.
# DO NOT CHANGE THIS CODE
def normalize_series(data, min, max):
   data = data - min
   data = data / max
   return data

In [4]:
df = pd.read_csv('household_power_consumption.csv', sep=',', infer_datetime_format=True, index_col='datetime', header=0)
print(df.columns)
data = df.values
data = normalize_series(data, data.min(axis=0), data.max(axis=0)) # the dataframe.min() function to find the minimum value over the index axis, exis=1 is column, exis=1 is row
data[:1]
print(len(df.index))

Index(['Global_active_power', 'Global_reactive_power', 'Voltage',
       'Global_intensity', 'Sub_metering_1', 'Sub_metering_2',
       'Sub_metering_3'],
      dtype='object')
86400


In [5]:
# This function is used to map the time series dataset into windows of
# features and respective targets, to prepare it for training and
# validation. First element of the first window will be the first element of
# the dataset. Consecutive windows are constructed by shifting
# the starting position of the first window forward, one at a time (indicated
# by shift=1). For a window of n_past number of observations of all the time
# indexed variables in the dataset, the target for the window
# is the next n_future number of observations of these variables, after the
# end of the window.
 
# DO NOT CHANGE THIS CODE
def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
   ds = tf.data.Dataset.from_tensor_slices(series)
   ds = ds.window(size=n_past + n_future, shift=shift, drop_remainder=True)
   ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
   ds = ds.map(lambda w: (w[:n_past], w[n_past:]))
   return ds.batch(batch_size).prefetch(1)
 

In [7]:
# COMPLETE THE CODE IN THIS FUNCTION
def solution_model(csv_file):
   #download_and_extract_data()
   # Reads the dataset from the CSV.
   df = pd.read_csv(csv_file, sep=',', infer_datetime_format=True, index_col='datetime', header=0)
 
   # Number of features in the dataset. We use all features as predictors to
   # predict all features at future time steps.
   N_FEATURES = len(df.columns) # DO NOT CHANGE THIS
 
   # Normalizes the data
   data = df.values
   data = normalize_series(data, data.min(axis=0), data.max(axis=0))
 
   # Splits the data into training and validation sets.
   SPLIT_TIME = int(len(data) * 0.5) # DO NOT CHANGE THIS
   x_train = data[:SPLIT_TIME]
   x_valid = data[SPLIT_TIME:]
 
   # DO NOT CHANGE THIS CODE
   tf.keras.backend.clear_session()
   tf.random.set_seed(42)
 
   # DO NOT CHANGE BATCH_SIZE IF YOU ARE USING STATEFUL LSTM/RNN/GRU.
   # THE TEST WILL FAIL TO GRADE YOUR SCORE IN SUCH CASES.
   # In other cases, it is advised not to change the batch size since it
   # might affect your final scores. While setting it to a lower size
   # might not do any harm, higher sizes might affect your scores.
   BATCH_SIZE = 32  # ADVISED NOT TO CHANGE THIS
 
   # DO NOT CHANGE N_PAST, N_FUTURE, SHIFT. The tests will fail to run
   # on the server.
   # Number of past time steps based on which future observations should be
   # predicted
   N_PAST = 24  # DO NOT CHANGE THIS
 
   # Number of future time steps which are to be predicted.
   N_FUTURE = 24  # DO NOT CHANGE THIS
 
   # By how many positions the window slides to create a new window
   # of observations.
   SHIFT = 1  # DO NOT CHANGE THIS
 
   # Code to create windowed train and validation datasets.
   train_set = windowed_dataset(series=x_train, batch_size=BATCH_SIZE,
                                n_past=N_PAST, n_future=N_FUTURE,
                                shift=SHIFT)
   valid_set = windowed_dataset(series=x_valid, batch_size=BATCH_SIZE,
                                n_past=N_PAST, n_future=N_FUTURE,
                                shift=SHIFT)
   return train_set, valid_set, N_FEATURES, BATCH_SIZE, N_PAST

train_set, valid_set, N_FEATURES, BATCH_SIZE, N_PAST = solution_model('household_power_consumption.csv')

In [8]:
#inputs = tf.keras.Input(shape=(N_PAST, N_FEATURES))
model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(7, return_sequences=True, input_shape=(24, 24)),
#    tf.keras.layers.LSTM(7, return_sequences=True, input_shape=inputs),
    tf.keras.layers.Dense(14, activation="relu"),
    tf.keras.layers.Dense(N_FEATURES)])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 24, 7)             896       
                                                                 
 dense (Dense)               (None, 24, 14)            112       
                                                                 
 dense_1 (Dense)             (None, 24, 7)             105       
                                                                 
Total params: 1,113
Trainable params: 1,113
Non-trainable params: 0
_________________________________________________________________


In [None]:
# Code to define your model.
model = tf.keras.models.Sequential([
#    tf.keras.layers.LSTM(7, return_sequences=True, input_shape=(24, 24)),
    tf.keras.layers.LSTM(7, return_sequences=True, input_shape=(N_PAST, N_FEATURES)),   #batch_size = 7 days, n_past = 24 hr, n_features = total data's qty ( len(df.columns)  )
    tf.keras.layers.Dense(14, activation="relu"),
    tf.keras.layers.Dense(N_FEATURES)])

model.summary()



Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_2 (LSTM)               (None, 24, 7)             420       
                                                                 
 dense_2 (Dense)             (None, 24, 14)            112       
                                                                 
 dense_3 (Dense)             (None, 24, 7)             105       
                                                                 
Total params: 637
Trainable params: 637
Non-trainable params: 0
_________________________________________________________________


In [None]:
optimizer =  tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
model.compile(
    loss="mse",
    optimizer=optimizer,
    metrics=["mae"]
)
model.fit(train_set, validation_data=valid_set,validation_steps=100, epochs=20)

   #### It is fail
model = tf.keras.models.Sequential([
      tf.keras.layers.Conv1D(filters=64, kernel_size=3,
                          strides=1,
                          activation="relu",
                          padding='causal',
                          input_shape=[G.WINDOW_SIZE, 1]),
      tf.keras.layers.LSTM(64, return_sequences=True),
      tf.keras.layers.LSTM(64),
      tf.keras.layers.Dense(30, activation="relu"),
      tf.keras.layers.Dense(10, activation="relu"),
      tf.keras.layers.Dense(1),
      tf.keras.layers.Lambda(lambda x: x * 400)
    ])
    

In [None]:
   #### It is fail
'''    model = tf.keras.models.Sequential([
      tf.keras.layers.Conv1D(filters=64, kernel_size=3,
                          strides=1,
                          activation="relu",
                          padding='causal',
                          input_shape=[G.WINDOW_SIZE, 1]),
      tf.keras.layers.LSTM(64, return_sequences=True),
      tf.keras.layers.LSTM(64),
      tf.keras.layers.Dense(30, activation="relu"),
      tf.keras.layers.Dense(10, activation="relu"),
      tf.keras.layers.Dense(1),
      tf.keras.layers.Lambda(lambda x: x * 400)
    ])'''
    

In [None]:
model.save("mymodelq5.h5")

In [None]:
# Note that you'll need to save your model as a .h5 like this.
# When you press the Submit and Test button, your saved .h5 model will
# be sent to the testing infrastructure for scoring
# and the score will be returned to you.
 
'''if __name__ == '__main__':
   model = solution_model()
   model.save("mymodel.h5")
 '''
 
# THIS CODE IS USED IN THE TESTER FOR FORECASTING. IF YOU WANT TO TEST YOUR MODEL
# BEFORE UPLOADING YOU CAN DO IT WITH THIS
#def mae(y_true, y_pred):
#    return np.mean(abs(y_true.ravel() - y_pred.ravel()))
#
#
#def model_forecast(model, series, window_size, batch_size):
#    ds = tf.data.Dataset.from_tensor_slices(series)
#    ds = ds.window(window_size, shift=1, drop_remainder=True)
#    ds = ds.flat_map(lambda w: w.batch(window_size))
#    ds = ds.batch(batch_size, drop_remainder=True).prefetch(1)
#    forecast = model.predict(ds)
#    return forecast
#
 
# PASS THE NORMALIZED data IN THE FOLLOWING CODE
 
#rnn_forecast = model_forecast(model, data, N_PAST, BATCH_SIZE)
#rnn_forecast = rnn_forecast[SPLIT_TIME - N_PAST:-1, 0, :]
 
#x_valid = x_valid[:rnn_forecast.shape[0]]
#result = mae(x_valid, rnn_forecast)





 
# ADD YOUR LAYERS HERE.
 
# If you don't follow the instructions in the following comments,
# tests will fail to grade your code:
# The input layer of your model must have an input shape of:
# (BATCH_SIZE, N_PAST = 24, N_FEATURES = 7)
# The model must have an output shape of:
# (BATCH_SIZE, N_FUTURE = 24, N_FEATURES = 7).
# Make sure that there are N_FEATURES = 7 neurons in the final dense
# layer since the model predicts 7 features.
 
# HINT: Bidirectional LSTMs may help boost your score. This is only a
# suggestion.
 
# WARNING: After submitting the trained model for scoring, if you are
# receiving a score of 0 or an error, please recheck the input and
# output shapes of the model to see if it exactly matches our requirements.
# The grading infrastructure is very strict about the shape requirements.
# Most common issues occur when the shapes are not matching our
# expectations.
#
# TIP: You can print the output of model.summary() to review the model
# architecture, input and output shapes of each layer.
# If you have made sure that you have matched the shape requirements
# and all the other instructions we have laid down, and still
# receive a bad score, you must work on improving your model.
 
# WARNING: If you are using the GRU layer, it is advised not to use the
# recurrent_dropout argument (you can alternatively set it to 0),
# since it has not been implemented in the cuDNN kernel and may
# result in much longer training times.