In [None]:
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from matplotlib import pyplot as plt
from scipy import stats
import tensorflow as tf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

#Load the data

In [None]:
df_s= pd.read_csv("/content/drive/MyDrive/sariyer_sample.csv")

In [None]:
df_s.shape()

#Visualize the data

In [None]:
fig, ax = plt.subplots()
df_s.plot(legend=False, ax=ax)
plt.show()

In [None]:
print('Box plot visualization:')
value.plot(kind='box', figsize = (10,4))
plt.show()

In [None]:
import seaborn as sns
fig,ax1=plt.subplots(ncols=1,figsize=(8,5))
ax1.set_title("Scaling")
sns.kdeplot(value["value"],ax=ax1)
plt.show()

#Prepare training data

In [None]:
training_mean = df_s.mean()
training_std = df_s.std()
df_training_value = (selection2 - training_mean) / training_std
print("Number of training samples:", len(df_training_value))

Create sequences

In [None]:
# helper function
def create_dataset(X, y, time_steps=1):
  a, b = [], []
  for i in range(len(X) - time_steps):
     v = X.iloc[i:(i + time_steps)].values
     a.append(v)
     b.append(y.iloc[i + time_steps])
  return np.array(a), np.array(b)

n_steps =96
# reshape to 3D [n_samples, n_steps, n_features]
X_train, y_train = create_dataset(train[['Value']], train['value'], n_steps)
X_test, y_test = create_dataset(test[['Value']], test['value'], n_steps)
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)

#Build a model

In [None]:
model = keras.Sequential(
    [
        layers.Input(shape=(X_train.shape[1], X_train.shape[2])),
        layers.Conv1D(
            filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Dropout(rate=0.2),
        layers.Conv1D(
            filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Conv1DTranspose(
            filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Dropout(rate=0.2),
        layers.Conv1DTranspose(
            filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
        ),
        layers.Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
    ]
)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss="mse")
model.summary()

#Train the model

In [None]:
history = model.fit(
    X_train,
    X_train,
    epochs=50,
    batch_size=128,
    validation_split=0.1,
    callbacks=[
        keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, mode="min")
    ],
)

#Detecting anomalies

We will detect anomalies by determining how well our model can reconstruct the input data.

Find MAE loss on training samples.
Find max MAE loss value. This is the worst our model has performed trying to reconstruct a sample. We will make this the threshold for anomaly detection.
If the reconstruction loss for a sample is greater than this threshold value then we can infer that the model is seeing a pattern that it isn't familiar with. We will label this sample as an anomaly.

In [None]:
# Get train MAE loss.
x_train_pred = model.predict(x_train)
train_mae_loss = np.mean(np.abs(x_train_pred - x_train), axis=1)

plt.hist(train_mae_loss, bins=50)
plt.xlabel("Train MAE loss")
plt.ylabel("No of samples")
plt.show()

# Get reconstruction loss threshold.
threshold = np.max(train_mae_loss)
print("Reconstruction error threshold: ", threshold)

In [None]:
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.legend()
plt.show()

In [None]:
plt.plot(x_train[0])
plt.plot(x_train_pred[0])
plt.show()

In [None]:
# Detect all the samples which are anomalies.
anomalies = test_mae_loss > threshold
print("Number of anomaly samples: ", np.sum(anomalies))
print("Indices of anomaly samples: ", np.where(anomalies))

In [None]:
# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data_indices = []
for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1):
    if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]):
        anomalous_data_indices.append(data_idx)

In [None]:
df_s["Outliers"]=pd.Series(model.predict(df_s[["value"]])).apply(lambda x: "yes" if( x==1) else "no")