# **Data Prep**

In [1]:
# Import libraries here
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Input
from keras.callbacks import EarlyStopping
from keras.optimizers import Adam
from keras.metrics import MeanAbsolutePercentageError
from statsmodels.tsa.arima.model import ARIMA



In [2]:
# Load dataset
df = pd.read_csv('/Users/payalpatel/Downloads/HomeC.csv', low_memory=False)

# Preprocessing
df['cloudCover'] = pd.to_numeric(df['cloudCover'], errors='coerce')
df['time'] = pd.to_datetime(df['time'], format='%H:%M:%S', errors='coerce').dt.time
df.loc[:, 'icon'] = df['icon'].fillna('Unknown')
df.loc[:, 'summary'] = df['summary'].fillna('Unknown')
df = pd.get_dummies(df, columns=['icon', 'summary'])

# Create variables for the columns that are relevant from the dataset
target_variable = 'House overall [kW]'
weather_variables = ['temperature', 'humidity', 'visibility', 'windSpeed', 'cloudCover',
            'dewPoint', 'precipIntensity', 'precipProbability']

# Prepare dataset
dataset = df[[target_variable] + weather_variables].copy()

# Lagged and rolling features
dataset['lag1'] = dataset[target_variable].shift(1)
dataset['lag2'] = dataset[target_variable].shift(2)
dataset['mean_roll'] = dataset[target_variable].rolling(window=7).mean()
dataset['std_roll'] = dataset[target_variable].rolling(window=7).std()

# Fill the missing values
data = dataset.bfill()

# Scale target and weather variables separately 
scaler = MinMaxScaler()
scaled_weather = scaler.fit_transform(data[weather_variables])
scaler_target = MinMaxScaler()
scaled_target = scaler_target.fit_transform(data[[target_variable]])

def create_sequences(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data[i:i+time_steps])  
        y.append(data[i+time_steps, 0])  
    return np.array(X), np.array(y)

# Combine variables for sequence
combine_t_w = np.hstack([scaled_weather, scaled_target])

# We are predicting for 1 year meaning 24x365 = 8760 
time_steps = 24

# Create sequences
X, y = create_sequences(combine_t_w, time_steps)

# Training and testing split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# y_test has one NaN, we can replace it with the mean value
y_test = np.nan_to_num(y_test, nan=np.nanmean(y_test)) 

X_train = np.nan_to_num(X_train)
y_train = np.nan_to_num(y_train)

# **Model Definition**

In [3]:
# Define LSTM model
model = Sequential()
model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=100, return_sequences=True))
model.add(LSTM(units=50, return_sequences=False))
model.add(Dense(units=25, activation='relu'))
model.add(Dense(units=1, activation='linear'))

optimizer = Adam(learning_rate=0.001, clipnorm=1.0)
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mae', MeanAbsolutePercentageError()])

# **Train the Model**

In [4]:
# Early stop (7 times before stopping)
early_stop = EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True)

# Train LSTM
history = model.fit(X_train, y_train, epochs=100, batch_size=128, validation_data=(X_test, y_test), verbose=1, callbacks=[early_stop])

Epoch 1/100
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 37ms/step - loss: 0.0038 - mae: 0.0156 - mean_absolute_percentage_error: 2987.7820 - val_loss: 1.8136e-05 - val_mae: 0.0019 - val_mean_absolute_percentage_error: 0.4353
Epoch 2/100
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 33ms/step - loss: 3.2808e-05 - mae: 0.0037 - mean_absolute_percentage_error: 733.4469 - val_loss: 2.0103e-05 - val_mae: 0.0030 - val_mean_absolute_percentage_error: 0.6044
Epoch 3/100
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 33ms/step - loss: 2.5826e-05 - mae: 0.0033 - mean_absolute_percentage_error: 416.6070 - val_loss: 1.3553e-05 - val_mae: 0.0024 - val_mean_absolute_percentage_error: 0.5122
Epoch 4/100
[1m3150/3150[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 33ms/step - loss: 1.7411e-05 - mae: 0.0027 - mean_absolute_percentage_error: 406.3524 - val_loss: 2.0353e-05 - val_mae: 0.0038 - val_mean_absolute_percentage_e

# **Evaluate the Model**

In [7]:
# Evaluate the LSTM model
loss, mae, mape = model.evaluate(X_test, y_test, verbose=0)
accuracy = 100 - mape
print(f"Test accuracy: {accuracy:2f}%")

Test accuracy: 99.831879%


# **Predictions**

# **Forecasting Future Energy Consumption**