<a href="https://colab.research.google.com/github/surajdusa/Predictive-Modelling-of-Energy-Usage-in-Response-to-Dynamic-Pricing-Signals-in-London-Households/blob/main/Dynamic_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('CC_LCL-FullData.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'CC_LCL-FullData.csv'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df.head()

In [None]:
print('The number of data points in the dataset:', df.shape[0])

In [None]:
print('The number of data points in the dataset:', df.shape[1])

In [None]:
df = df.iloc[:2000000]

In [None]:
print(df.isnull().sum())

In [None]:
df["Date"] = pd.to_datetime(df["DateTime"]).dt.date
df["Time"] = pd.to_datetime(df["DateTime"]).dt.time
df.drop(columns=["DateTime"], inplace=True)

In [None]:
print('The number of data points in the dataset:', df.shape[0])

In [None]:
df.head()

In [None]:
print(df.columns.tolist())

In [None]:
df.columns = df.columns.str.strip()

In [None]:
df["KWH/hh (per half hour)"] = pd.to_numeric(df["KWH/hh (per half hour)"], errors="coerce")

In [None]:
dx = df.groupby("Date")["KWH/hh (per half hour)"].sum().reset_index()
dx.columns = ["Date", "KWH"]

In [None]:
dx.head(20)

In [None]:
dx['Date'] = pd.to_datetime(dx['Date'], errors='coerce')

In [None]:
dx['year'] = dx['Date'].dt.year
dx['quarter'] = dx['Date'].dt.quarter
dx['month'] = dx['Date'].dt.month
dx['day'] = dx['Date'].dt.day

In [None]:
dx.head()

In [None]:
import calendar
dx['month'] = dx['month'].apply(lambda x: calendar.month_name[x])

In [None]:
dx.head()

In [None]:
dx.tail()

In [None]:
dx.KWH.describe()

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='year', y='KWH', data=dx, palette='Set2')
plt.title("KWH Consumption Distribution by Year", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("KWH Consumption", fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
Q1 = dx['KWH'].quantile(0.25)
Q3 = dx['KWH'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
dx = dx[(dx['KWH'] >= lower_bound) & (dx['KWH'] <= upper_bound)]

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='year', y='KWH', data=dx, palette='Set2')
plt.title("KWH Consumption Distribution by Year", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("KWH Consumption", fontsize=12)
plt.tight_layout()
plt.show()

In [None]:
dx.head()

In [None]:
yearly_avg = dx.groupby('year')['KWH'].mean()
plt.figure(figsize=(10, 6))
yearly_avg.plot(kind='bar', color='c', edgecolor='black')
overall_avg = dx['KWH'].mean()
plt.axhline(y=overall_avg, color='black', linestyle='--', label=f"Avg Consumption: {overall_avg:.2f} KWH")
plt.text(len(yearly_avg) - 1, overall_avg + 0.05, f"Avg: {overall_avg:.2f}", color='black', fontsize=12, ha='center')
plt.title("Average Consumption by Year", fontsize=14)
plt.xlabel("Year", fontsize=12)
plt.ylabel("Average KWH", fontsize=12)
plt.grid(visible=True, linestyle='--', alpha=0.7)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
month_order = list(calendar.month_name)[1:]
dx['month'] = pd.Categorical(dx['month'], categories=month_order, ordered=True)

overall_avg = dx["KWH"].mean()
monthly_avg = dx.groupby('month')['KWH'].mean()
plt.figure(figsize=(10, 6))
monthly_avg.plot(kind='line', marker='o', color='b', label='Monthly Average')
plt.axhline(y=overall_avg, color='black', linestyle='--', label=f"Avg Consumption: {overall_avg:.2f} KWH")
plt.title("Average Consumption by Month", fontsize=14)
plt.xlabel("Month", fontsize=12)
plt.ylabel("Average KWH", fontsize=12)
plt.xticks(rotation=45)
plt.grid(visible=True, linestyle='--', alpha=0.7)
plt.text(6, overall_avg + 0.05, f"Avg: {overall_avg:.2f} KWH", color='black', fontsize=12, ha='center')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
print('Minimum date:', dx.Date.min())
print('Maximum date:', dx.Date.max())

In [None]:
plt.figure(figsize=(15, 6))
plt.plot(dx['Date'], dx['KWH'], color='blue')
plt.ylabel('Kilowatt Hour (KWH)', fontsize=12)
plt.xlabel('Date', fontsize=12)
plt.title('Daily Energy Consumption oover the period of time')
plt.tight_layout()
sns.despine(bottom=True, left=True)
plt.show()

In [None]:
dx.head()

In [None]:
dx['Date'] = pd.to_datetime(dx['Date'])
dx = dx.sort_values('Date')

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
scaler = MinMaxScaler()
dx['KWH_normalized'] = scaler.fit_transform(dx[['KWH']])

In [None]:
dx.head()

In [None]:
def create_sequences(data, input_steps, output_steps):
    x, y = [], []
    for i in range(len(data) - input_steps - output_steps + 1):
        x.append(data[i:i + input_steps])
        y.append(data[i + input_steps:i + input_steps + output_steps])
    return np.array(x), np.array(y)

In [None]:
input_steps = 60
output_steps = 60
x, y = create_sequences(dx['KWH_normalized'].values, input_steps, output_steps)

In [None]:
split_idx = int(0.8 * len(x))
x_train, x_test = x[:split_idx], x[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

In [None]:
x_train

In [None]:
y_train

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [None]:
model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(input_steps, 1)),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(output_steps)])

In [None]:
model.compile(optimizer='adam', loss='mse')

In [None]:
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

In [None]:
history = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=50, batch_size=32)

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()

In [None]:
y_pred = model.predict(x_test)
y_test_rescaled = scaler.inverse_transform(y_test)
y_pred_rescaled = scaler.inverse_transform(y_pred)

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(output_steps), y_test_rescaled[0], label='Actual')
plt.plot(range(output_steps), y_pred_rescaled[0], label='Predicted')
plt.title('Prediction vs Actual for First Test Sequence')
plt.xlabel('Days')
plt.ylabel('Energy Consumption (KWH)')
plt.legend()
plt.show()