<a href="https://colab.research.google.com/github/mnoorchenar/SmartMeterData/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objs as go
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.callbacks import Callback
from plotly.subplots import make_subplots

# Load the data
df = pd.read_csv('https://raw.githubusercontent.com/mnoorchenar/data/main/Smart_Meter_Data/SmartMeterData_ID_1017024.csv')  # Replace 'data.csv' with your actual data file name
df['Time'] = pd.to_datetime(df['Time'])  # Convert the 'Time' column to a datetime format
train_df = df[df['Time'].dt.year == 2021].copy()  # Subset the data for training
test_df = df[df['Time'].dt.year == 2022].copy()  # Subset the data for testing

# Build the Autoencoder model
input_dim = train_df.shape[1] - 1  # The input dimension is the number of features minus the time column
encoding_dim = 5  # You can adjust the encoding dimension as needed
input_layer = Input(shape=(input_dim,))
encoder_layer1 = Dense(20, activation='relu')(input_layer)
encoder_layer2 = Dense(encoding_dim, activation='relu')(encoder_layer1)
decoder_layer1 = Dense(20, activation='relu')(encoder_layer2)
decoder_layer2 = Dense(input_dim, activation='linear')(decoder_layer1)
autoencoder_model = Model(inputs=input_layer, outputs=decoder_layer2)
autoencoder_model.compile(optimizer='adam', loss='mse')

# Define a custom callback to plot the training and validation loss during training
class PlotLosses(Callback):
    def __init__(self, fig, validation_data):
        super().__init__()
        self.fig = fig
        self.validation_data = validation_data

    def on_train_begin(self, logs=None):
        self.losses = []
        self.val_losses = []
        self.logs = []

    def on_epoch_end(self, epoch, logs=None):
        self.losses.append(logs['loss'])
        self.val_losses.append(logs['val_loss'])
        self.logs.append(logs)
        self.update()

    def update(self):
        with self.fig.batch_update():
            self.fig.data[0].y = self.losses
            self.fig.data[1].y = self.val_losses
            self.fig.update()

# Create the subplots for the loss plot and the time series plot
fig = make_subplots(rows=2, cols=1, shared_xaxes=True)

# Add the training and validation loss traces to the loss plot
loss_trace = go.Scatter(x=[], y=[], mode='lines', name='Training loss')
val_loss_trace = go.Scatter(x=[], y=[], mode='lines', name='Validation loss')
fig.add_trace(loss_trace, row=1, col=1)
fig.add_trace(val_loss_trace, row=1, col=1)

# Add the VAL data and anomaly points to the time series plot
val_trace_train = go.Scatter(x=train_df['Time'], y=train_df['VAL'], mode='lines', name='Training data')
val_trace_test = go.Scatter(x=test_df['Time'], y=test_df['VAL'], mode='lines', name='Test data')
anomaly_trace_train = go.Scatter(x=[], y=[], mode='markers', name='Anomalies in training data', marker=dict(color='red', size=8))
anomaly_trace_test = go.Scatter(x=[], y=[], mode='markers', name='Anomalies in test data', marker=dict(color='red', size=8))
fig.add_trace(val_trace_train, row=2, col=1)
fig.add_trace(val_trace_test, row=2, col=1)
fig.add_trace(anomaly_trace_train, row=2, col=1)
fig.add_trace(anomaly_trace_test, row=2, col=1)

# Train the Autoencoder model and detect anomalies in the training and test data
epochs = 50 # You can adjust the number of epochs as needed
batch_size = 64 # You can adjust the batch size as needed
history = autoencoder_model.fit(train_df.iloc[:, 1:], train_df.iloc[:, 1:], epochs=epochs, batch_size=batch_size, validation_data=(test_df.iloc[:, 1:], test_df.iloc[:, 1:]), callbacks=[PlotLosses(fig, (test_df.iloc[:, 1:], test_df.iloc[:, 1:]))])

# Use the Autoencoder model to detect anomalies in the training data
train_df['val_pred'] = autoencoder_model.predict(train_df.iloc[:, 1:])
train_df['mse'] = np.mean(np.power(train_df.iloc[:, 1:] - train_df['val_pred'], 2), axis=1)
mse_threshold = np.percentile(train_df['mse'], 95) # You can adjust the percentile threshold as needed
train_df['anomaly'] = train_df['mse'] > mse_threshold
train_anomalies = train_df[train_df['anomaly']]

# Use the Autoencoder model to detect anomalies in the test data
test_df['val_pred'] = autoencoder_model.predict(test_df.iloc[:, 1:])
test_df['mse'] = np.mean(np.power(test_df.iloc[:, 1:] - test_df['val_pred'], 2), axis=1)
test_df['anomaly'] = test_df['mse'] > mse_threshold
test_anomalies = test_df[test_df['anomaly']]

# Add the anomaly points to the time series plot
anomaly_trace_train.x = train_anomalies['Time']
anomaly_trace_train.y = train_anomalies['VAL']
anomaly_trace_test.x = test_anomalies['Time']
anomaly_trace_test.y = test_anomalies['VAL']

# Show the plot
fig.show()
