# Imports

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm as tqdm_notebook
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_squared_error as mse



# Data Preperation

```
# Exploratory Data Analysis
```



In [None]:
df = pd.read_csv("household_power_consumption_household_power_consumption.csv")

In [None]:
df.head()
df.shape #9 columns

(260640, 9)

In [None]:
df.head(20)

Unnamed: 0,Date,Time,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
0,1/1/07,0:00:00,2.58,0.136,241.97,10.6,0,0,0.0
1,1/1/07,0:01:00,2.552,0.1,241.75,10.4,0,0,0.0
2,1/1/07,0:02:00,2.55,0.1,241.64,10.4,0,0,0.0
3,1/1/07,0:03:00,2.55,0.1,241.71,10.4,0,0,0.0
4,1/1/07,0:04:00,2.554,0.1,241.98,10.4,0,0,0.0
5,1/1/07,0:05:00,2.55,0.1,241.83,10.4,0,0,0.0
6,1/1/07,0:06:00,2.534,0.096,241.07,10.4,0,0,0.0
7,1/1/07,0:07:00,2.484,0.0,241.29,10.2,0,0,0.0
8,1/1/07,0:08:00,2.468,0.0,241.23,10.2,0,0,0.0
9,1/1/07,0:09:00,2.486,0.0,242.18,10.2,0,0,0.0


In [None]:
#Shifting Powers to the last column 
global_active_power = df.pop('Global_active_power')
global_reactive_power = df.pop('Global_reactive_power')
df['Global_active_power'] = global_active_power
df['Global_reactive_power'] = global_reactive_power #adding them tot he end of the result

In [None]:
# df.dtypes
df.infer_objects().dtypes

Date                      object
Time                      object
Global_active_power       object
Global_reactive_power     object
Voltage                   object
Global_intensity          object
Sub_metering_1            object
Sub_metering_2            object
Sub_metering_3           float64
dtype: object

In [None]:
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df['DateTime'] = pd.to_datetime(df['Date'].dt.strftime('%Y-%m-%d') + ' ' + df['Time'])

df.set_index('DateTime', inplace=True)

df.drop(['Date', 'Time'], axis=1, inplace=True)

In [None]:
#Object to numeric datatype
for i in range(len(df.columns)):
    try:
      df[df.columns[i]] = pd.to_numeric(df[df.columns[i]], errors='coerce')
    except Exception as e:
      pass

In [None]:
df.infer_objects().dtypes

In [None]:
tempV = df['Voltage']
tempV.plot()

In [None]:
tempP = df['Global_active_power']
tempP.plot()

In [None]:
tempS = df['Sub_metering_1']
tempS.plot()

In [None]:
tempS2 = df['Sub_metering_2']
tempS.plot()

In [None]:
tempS3 = df['Sub_metering_3']
tempS.plot()

In [None]:
df_group_1 = df[['Voltage',	'Global_intensity',	'Sub_metering_1', 'Global_active_power','Global_reactive_power']]
df_group_2 = df[['Voltage',	'Global_intensity',	'Sub_metering_2', 'Global_active_power','Global_reactive_power']]
df_group_3 = df[['Voltage',	'Global_intensity',	'Sub_metering_3', 'Global_active_power','Global_reactive_power',]]

In [None]:
# df_sub_metering_1.describe()
# df_sub_metering_2.describe()
df_group_3.describe()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_3,dayofweek,month,dayofyear,Hour,Minute
count,86996.0,86996.0,86996.0,86996.0,86996.0,90767.0,90767.0,90767.0,90767.0,90767.0
mean,2.182119,0.137726,237.769858,9.210244,17.219355,3.007007,4.504512,120.239305,13.261626,29.494078
std,1.157982,0.123673,3.554284,4.994707,1.344115,2.038492,2.979774,86.0915,5.973373,17.355877
min,0.16,0.0,223.49,0.8,1.0,0.0,1.0,1.0,0.0,0.0
25%,1.38,0.052,235.3,5.8,17.0,1.0,2.0,50.0,9.0,14.0
50%,1.652,0.108,238.13,7.0,17.0,3.0,4.0,111.0,13.0,30.0
75%,2.696,0.208,240.33,11.4,18.0,5.0,6.0,168.0,19.0,45.0
max,10.67,1.148,249.76,46.4,20.0,6.0,12.0,340.0,23.0,59.0


df_group_3

In [None]:
df_group_3.plot(kind='scatter', x='Sub_metering_3', y='Global_active_power', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)

df_group_2.plot(kind='scatter', x='Sub_metering_2', y='Global_active_power', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)

df_group_1.plot(kind='scatter', x='Sub_metering_1', y='Global_active_power', s=32, alpha=.8)
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
df_group_3['Voltage'].plot(kind='hist', bins=20, title='Voltage')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
df_group_3['Global_active_power'].plot(kind='hist', bins=20, title='Voltage')
plt.gca().spines[['top', 'right',]].set_visible(False)

In [None]:
df_group_3.plot(figsize=(7,6))

In [None]:
df.shape
print(df.isnull().sum())
df = df.dropna(subset=['Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3'])
df

In [None]:
#We will be working with the forecasting of Sub Metering 1 == temp as a measure of precision
temp = df['Sub_metering_1']
temp.plot()

In [None]:
#Defining window size of forecast
#[1, 2, 3, 4, 5] --> [6] minute
def df_to_X_y(df, window_size=5):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_size):
    row = [[a] for a in df_as_np[i:i+window_size]]
    X.append(row)
    label = df_as_np[i+window_size]
    y.append(label)
  return np.array(X), np.array(y)

In [None]:
WINDOW_SIZE = 5
X1, y1 = df_to_X_y(temp, WINDOW_SIZE)
X1.shape, y1.shape

In [None]:
#256864 total records
X_train1, y_train1 = X1[:180000], y1[:180000]
X_val1, y_val1 = X1[180000:220000], y1[180000:220000]
X_test1, y_test1 = X1[220000:], y1[220000:]
X_train1.shape, y_train1.shape, X_val1.shape, y_val1.shape, X_test1.shape, y_test1.shape

In [None]:
#Preprocessing Data - scaling
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train1 = scaler.fit_transform(X_train1.reshape(-1, X_train1.shape[-1])).reshape(X_train1.shape)
X_val1 = scaler.transform(X_val1.reshape(-1, X_val1.shape[-1])).reshape(X_val1.shape)
X_test1 = scaler.transform(X_test1.reshape(-1, X_test1.shape[-1])).reshape(X_test1.shape)

In [None]:
#Making DateTime column as index
df['DateTime'] = df.index
df = df[['DateTime'] + [col for col in df.columns if col != 'DateTime']]

In [None]:
#MODEL 1 : LSTM with ReLU
model1 = Sequential()
model1.add(InputLayer((5, 1)))
model1.add(LSTM(64))
model1.add(Dense(8, 'relu'))
model1.add(Dense(1, 'linear'))

model1.summary()

In [None]:
cp1 = ModelCheckpoint('model1/', save_best_only=True)
model1.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
model1.fit(X_train1, y_train1, validation_data=(X_val1, y_val1), epochs=10, callbacks=[cp1])

In [None]:
#Predictions

def plot_predictions1(model, X, y, start=0, end=100):
  predictions = model.predict(X).flatten()
  df = pd.DataFrame(data={'Predictions':predictions, 'Actuals':y})
  plt.plot(df['Predictions'][start:end])
  plt.plot(df['Actuals'][start:end])
  return df, mse(y, predictions)

train_predictions = model1.predict(X_train1).flatten()
train_results = pd.DataFrame(data={'Train Predictions':train_predictions, 'Actuals':y_train1})
train_results
val_predictions = model1.predict(X_val1).flatten()
val_results = pd.DataFrame(data={'Val Predictions':val_predictions, 'Actuals':y_val1})
val_results

In [None]:
plot_predictions1(model1, X_test1, y_test1)

In [None]:
#MODEL 2: 1D CNN 
model2 = Sequential()
model2.add(InputLayer((5, 1)))
model2.add(Conv1D(64, kernel_size=2))
model2.add(Flatten())
model2.add(Dense(8, 'relu'))
model2.add(Dense(1, 'linear'))

model2.summary()

In [None]:
cp2= ModelCheckpoint('model2/', save_best_only=True)
model2.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
model2.fit(X_train1, y_train1, validation_data=(X_val1, y_val1), epochs=10, callbacks=[cp2])

In [None]:
plot_predictions1(model2, X_test1, y_test1)

In [None]:
#MODEL 3 : GRU
model3 = Sequential()
model3.add(InputLayer((5, 1)))
model3.add(GRU(64))
model3.add(Dense(8, 'relu'))
model3.add(Dense(1, 'linear'))
model3.summary()

In [None]:
cp3 = ModelCheckpoint('model3/', save_best_only=True)
model3.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
plot_predictions1(model3, X_test1, y_test1)

In [None]:
#Creating precise timestamps
temp_df = pd.DataFrame({'Sub_1':temp})
temp_df['Seconds'] = temp_df.index.map(pd.Timestamp.timestamp)
temp_df

day = 60*60*24
year = 365.2425*day

temp_df['Day sin'] = np.sin(temp_df['Seconds'] * (2* np.pi / day))
temp_df['Day cos'] = np.cos(temp_df['Seconds'] * (2 * np.pi / day))
temp_df['Year sin'] = np.sin(temp_df['Seconds'] * (2 * np.pi / year))
temp_df['Year cos'] = np.cos(temp_df['Seconds'] * (2 * np.pi / year))
temp_df.head()

In [None]:
temp_df = temp_df.drop('Seconds', axis=1)
temp_df.head()

In [None]:
def df_to_X_y2(df, window_size=6):
  df_as_np = df.to_numpy()
  X = []
  y = []
  for i in range(len(df_as_np)-window_size):
    row = [r for r in df_as_np[i:i+window_size]]
    X.append(row)
    label = df_as_np[i+window_size][0]
    y.append(label)
  return np.array(X), np.array(y)

In [None]:
X2, y2 = df_to_X_y2(temp_df)
X2.shape, y2.shape

In [None]:
X_train2, y_train2 = X2[:180000], y2[:180000]
X_val2, y_val2 = X2[180000:220000], y2[180000:220000]
X_test2, y_test2 = X2[220000:], y2[220000:]
X_train2.shape, y_train2.shape, X_val2.shape, y_val2.shape, X_test2.shape, y_test2.shape

In [None]:
temp_training_mean = np.mean(X_train2[:, :, 0])
temp_training_std = np.std(X_train2[:, :, 0])

def preprocess(X):
  X[:, :, 0] = (X[:, :, 0] - temp_training_mean) / temp_training_std
  return X

In [None]:
preprocess(X_train2)
preprocess(X_val2)
preprocess(X_test2)

In [None]:
#MODEL 4: LSTM with standardisation 
model4 = Sequential()
model4.add(InputLayer((6, 5)))
model4.add(LSTM(64))
model4.add(Dense(8, 'relu'))
model4.add(Dense(1, 'linear'))

model4.summary()

In [None]:
cp4 = ModelCheckpoint('model4/', save_best_only=True)
model4.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=0.0001), metrics=[RootMeanSquaredError()])
model4.fit(X_train2, y_train2, validation_data=(X_val2, y_val2), epochs=10, callbacks=[cp4])

In [None]:
plot_predictions1(model4, X_test2, y_test2)