<a href="https://colab.research.google.com/github/mnoorchenar/SmartMeterData/blob/main/Autoencoder_AnomalyDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from keras.layers import Input, Dense
from keras.models import Model
import plotly.graph_objs as go

# Load data from CSV or other source into a Pandas dataframe
df = pd.read_csv('https://raw.githubusercontent.com/mnoorchenar/data/main/SmartMeterData_ID_313960.csv')

# Convert READTS column to datetime format
df['READTS'] = pd.to_datetime(df['READTS'])

# Sort the dataframe by the READTS column
df = df.sort_values('READTS')

# Extract the values from the dataframe
timestamps = df['READTS'].values
data = df['VAL'].values

# Define the autoencoder model
input_layer = Input(shape=(1,))
encoded = Dense(32, activation='relu')(input_layer)
encoded = Dense(16, activation='relu')(encoded)
encoded = Dense(8, activation='relu')(encoded)
decoded = Dense(16, activation='relu')(encoded)
decoded = Dense(32, activation='relu')(decoded)
decoded = Dense(1, activation='sigmoid')(decoded)
autoencoder = Model(inputs=input_layer, outputs=decoded)

# Convert the data to a numpy array and normalize
x_train = data.reshape(-1, 1)
x_train = (x_train - x_train.mean()) / x_train.std()

# Train the autoencoder model
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(x_train, x_train, epochs=50, batch_size=128, validation_split=0.1)

# Use the trained autoencoder to detect anomalies
preds = autoencoder.predict(x_train)
mse = np.mean(np.power(x_train - preds, 2), axis=1)
threshold = np.percentile(mse, 95)
anomalies = np.where(mse > threshold)[0]

# Create a trace for the time series data
trace1 = go.Scatter(
    x=timestamps,
    y=data,
    mode='lines',
    name='Data'
)

# Create a trace for the anomalies
trace2 = go.Scatter(
    x=timestamps[anomalies],
    y=data[anomalies],
    mode='markers',
    name='Anomalies',
    marker=dict(
        color='red',
        size=10
    )
)

# Combine the traces into a plotly figure
fig = go.Figure(data=[trace1, trace2])

# Show the plotly figure
fig.show()


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [14]:
import numpy as np
import pandas as pd
from keras.layers import Input, Dense
from keras.models import Model
import plotly.graph_objs as go

# Load data from CSV or other source into a Pandas dataframe
df = pd.read_csv('https://raw.githubusercontent.com/mnoorchenar/data/main/SmartMeterData_ID_313960.csv')

# Convert READTS column to datetime format
df['READTS'] = pd.to_datetime(df['READTS'])

# Split the data into training and test sets based on the year of the READTS column
train_data = df[df['READTS'].dt.year == 2021]['VAL'].values
test_data = df[df['READTS'].dt.year == 2022]['VAL'].values

# Extract the timestamps for the training and test sets
train_timestamps = df[df['READTS'].dt.year == 2021]['READTS'].values
test_timestamps = df[df['READTS'].dt.year == 2022]['READTS'].values

# Define the autoencoder model
input_layer = Input(shape=(1,))
encoded = Dense(32, activation='relu')(input_layer)
encoded = Dense(16, activation='relu')(encoded)
encoded = Dense(8, activation='relu')(encoded)
decoded = Dense(16, activation='relu')(encoded)
decoded = Dense(32, activation='relu')(decoded)
decoded = Dense(1, activation='sigmoid')(decoded)
autoencoder = Model(inputs=input_layer, outputs=decoded)

# Convert the training data to a numpy array and normalize
x_train = train_data.reshape(-1, 1)
x_train = (x_train - x_train.mean()) / x_train.std()

# Train the autoencoder model on the training data
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(x_train, x_train, epochs=10, batch_size=128, validation_split=0.1)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f270b293be0>

In [16]:
print(df.shape) # should be (num_samples, 1)
print(x_train.shape) # should be (num_samples, 1)
x_test = test_data.reshape(-1, 1)
print(x_test.shape) # should be (num_samples, 1)


(52416, 2)
(35020, 1)
(17396, 1)


In [17]:
# Use the trained autoencoder to detect anomalies in the test data
x_test = (x_test - x_train.mean()) / x_train.std()
preds = autoencoder.predict(x_test)
mse = np.mean(np.power(x_test - preds, 2), axis=1)
threshold = np.percentile(mse, 95)
anomalies = np.where(mse > threshold)[0]





In [18]:
# Create a trace for the training data
trace1 = go.Scatter(
    x=train_timestamps,
    y=train_data,
    mode='lines',
    name='Training Data',
    line=dict(
        color='blue'
    )
)

# Create a trace for the test data
trace2 = go.Scatter(
    x=test_timestamps,
    y=test_data,
    mode='lines',
    name='Test Data',
    line=dict(
        color='green'
    )
)

# Create a trace for the anomalies
trace3 = go.Scatter(
    x=test_timestamps[anomalies],
    y=test_data[anomalies],
    mode='markers',
    name='Anomalies',
    marker=dict(
        color='red',
        size=10
    )
)

# Combine the traces into a plotly figure
fig = go.Figure(data=[trace1, trace2, trace3])

# Show the plotly figure
fig.show()
