In [1]:
import tqdm
import torch
import numpy as np
import pandas as pd
from utils import *
from torch import nn, optim
from networks import DNet, GNet
from sklearn.cluster import KMeans
from sklearn.svm import OneClassSVM
from matplotlib import pyplot as plot
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader

In [None]:
cuda = torch.cuda.is_available()
if cuda:
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

### Reading the Data


In [None]:
# stationary
# seasonal
# trendy

df = pd.read_csv("data/stationary-arma-ts.csv")
ts = df["# Data"]
print(df['# Data'].describe())
print("\n")
print("Shape of TS: " + str(ts.shape))
print("Type of TS: " + str(type(ts)))

In [None]:
ts_scaler = scale(ts)
ts_scaled = ts_scaler["scaled"].values
ts_scaled = ts_scaled[:, 0]

In [None]:
str_trd_train = strongest_trend_period(ts_scaled.reshape(ts_scaled.shape[0], ), 1, 25)
str_trd_train["period"]

detrended_train = remove_trend(ts_scaled.reshape(ts_scaled.shape[0], ), str_trd_train["period"])
if(linearity_score(detrended_train["trend"].reshape(detrended_train["trend"].shape[0], )) >= 0.8):
    ts_scaled = detrended_train["detrended"]
    ts_detrended_train_scaler = scale(pd.Series(ts_scaled.reshape(ts_scaled.shape[0], )))
    ts_scaled = ts_detrended_train_scaler["scaled"].values

str_period_train = strongest_seasonal_period(ts_scaled.reshape(ts_scaled.shape[0], ), 1, 25)
if(str_period_train["seasonality_strength"] >= 0.8):
    batch_size = str_period_train["period"]

### Scaling

In [None]:
train = ts_scaled[:12012]
train = torch.Tensor(train)
x_train = train[:12000]
y_train = train[12:12012]
x_train = x_train.reshape(1000, 12)
y_train = y_train.reshape(1000, 12)

In [None]:
test = ts_scaled[12012:12144]
test = torch.Tensor(test)

In [None]:
x_test = test[:120]
y_test = test[12:]
x_test = x_test.reshape(10, 12)
y_test = y_test.reshape(10, 12)

In [None]:
batch_size = 1

train_dataset = CustomDataset(x_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

In [None]:
dNet = DNet()
gNet = GNet()
dNet.to(device)
gNet.to(device)
d_optimizer = optim.Adam(dNet.parameters(), lr=0.0002)
g_optimizer = optim.Adam(gNet.parameters(), lr=0.0002)
loss = nn.BCELoss()

In [None]:
def train_d(optimizer, real_data, fake_data, device):
    N = real_data.size(0)
    optimizer.zero_grad()

    pred_real = dNet(real_data)
    error_real = loss(pred_real, ones_target(N, device))
    error_real.backward()

    pred_fake = dNet(fake_data)
    error_fake = loss(pred_fake, zeros_target(N, device))
    error_fake.backward()

    optimizer.step()

    error_total = error_real + error_fake
    return error_total, pred_real, pred_fake


def train_g(optimizer, real_data, fake_data, device):
    N = fake_data.size(0)

    optimizer.zero_grad()

    pred = dNet(fake_data)

    error = loss(pred, ones_target(N, device))
    error.backward()

    optimizer.step()
    return error

### Training

In [None]:
num_epochs = 200

d_loss_array = []
g_loss_array = []

for epoch in tqdm.tqdm(range(num_epochs)):
    gNet.train()
    dNet.train()
    for n_batch, (inputs, targets) in enumerate(train_dataloader):
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        real_data = targets
        
        fake_data = gNet(inputs).detach()

        
        d_error, d_pred_real, d_pred_fake = train_d(d_optimizer, real_data, fake_data, device)
        
        fake_data = gNet(inputs)
        
        g_error = train_g(g_optimizer, real_data, fake_data, device)
        
        r = targets[0, :].reshape(12, )
        f = fake_data[0, :].reshape(12, )
        
        d_loss_array.append(d_error.item())
        g_loss_array.append(g_error.item())
        
    plot.plot(np.array(r.cpu()), label="real")
    plot.plot(np.array(f.cpu().detach()), label="generated")
    plot.legend(loc = "upper right")
    plot.show()

### Saving the models

In [None]:
torch.save(gNet.state_dict(), "gNet_good.pth")

In [None]:
torch.save(dNet.state_dict(), "dNet_good.pth")

# Test

# Loading the Model

In [None]:
gNet = GNet()
gNet.to(device)
gNet.load_state_dict(torch.load("models/gNet_good.pth", map_location=torch.device('cpu')))
gNet.eval()

In [None]:
real_all = []
generated_all = []
y_dim_0 = y_test.shape[0]
y_dim_1 = y_test.shape[1]

# modified x_test

# Point 
# 6

# y_test[0, 7] = 1.5
# y_test[1, 4] = 4
# y_test[1, 11] = -2
# y_test[2, 9] = 7
# y_test[5, 2] = 10
# y_test[8, 5] = 3

# Sequantial Anomalies
# 8

# y_test[0, 7] = 10

# y_test[4, 1] = 5
# y_test[4, 2] = 5
# y_test[4, 3] = 5
# y_test[4, 4] = 5
# y_test[4, 5] = 5
# y_test[4, 6] = 5
# y_test[4, 7] = 5


#Sequential and sequential
# 13

# y_test[0, 7] = 25

# y_test[5, 1] = 15
# y_test[5, 2] = 15
# y_test[5, 3] = 15
# y_test[5, 4] = 15
# y_test[5, 5] = 15
# y_test[5, 6] = 15
# y_test[5, 7] = 15

# y_test[7, 4] = 10
# y_test[7, 5] = 10
# y_test[7, 6] = 10
# y_test[7, 7] = 10
# y_test[7, 8] = 10


# Seq and Point
# 11

y_test[0, 7] = 25
y_test[3, 5] = 30

y_test[5, 1] = 12
y_test[5, 2] = 12
y_test[5, 3] = 12
y_test[5, 4] = 12
y_test[5, 5] = 12
y_test[5, 6] = 12
y_test[5, 7] = 12

y_test[7, 4] = 20
y_test[9, 6] = 7




for i in range(y_dim_0-1):
    a = x_test[i, :]
    real = y_test[i, :]
    a = a.to(device)
    real = real.to(device)
    generated = gNet(a)
    
    real_all.append(np.array(real.cpu()))
    generated_all.append(np.array(generated.cpu().detach()))

real_all = np.asarray(real_all)
generated_all = np.asarray(generated_all)

real_all = real_all.reshape(y_dim_0*y_dim_1-12, )
generated_all = generated_all.reshape(y_dim_0*y_dim_1-12, )

fig, ax = plot.subplots(figsize=(10,6))
ax.plot(real_all, label="Real Data")
ax.plot(generated_all, label="Generated")
plot.legend(loc = "upper right")
plot.show()

### AD by GAN Predictions

In [None]:
residual = real_all - generated_all

residual_mean = np.mean(residual)
residual_stdev = np.std(residual)
upper_bound = residual_mean + (2*residual_stdev)
lower_bound = residual_mean - (2*residual_stdev)

upper_ci = generated_all + 1.96 * residual_stdev
lower_ci = generated_all - 1.96 * residual_stdev

anomaly_indices = []
anomaly_values = []
for i in range(real_all.shape[0]):
    if real_all[i] >= upper_ci[i] or real_all[i] <= lower_ci[i]:
        anomaly_indices.append(i)
        anomaly_values.append(real_all[i])
        
fig, ax = plot.subplots(figsize=(10,6))
ax.plot(real_all, label="Real Data")
ax.plot(generated_all, label="Generated")
ax.plot(upper_ci, color="#000000")
ax.plot(lower_ci, color="#000000")
plot.scatter(x=anomaly_indices, y=anomaly_values, color="#cd8fa3", label="Anomaly")
plot.legend(loc = "upper right")
plot.show()

### K-Means (- hyperparameter tuning)

In [None]:
data = real_all.reshape(-1, 1)
n_cluster = range(1, 20)
kmeans = [KMeans(n_clusters=i).fit(data) for i in n_cluster]
scores = [kmeans[i].score(data) for i in range(len(kmeans))]

fig, ax = plot.subplots(figsize=(10,6))
ax.plot(n_cluster, scores)
plot.xlabel('Number of Clusters')
plot.ylabel('Score')
plot.title('Elbow Curve')
plot.show()

In [None]:
X = real_all.reshape(-1, 1)
km = KMeans(n_clusters=5)
km.fit(X)
km.predict(X)
labels = km.labels_

outliers_fraction = 0.01

distance = getDistanceByPoint(data, kmeans[9])
number_of_outliers = int(outliers_fraction*len(distance))
threshold = distance.nlargest(number_of_outliers).min()
real_data_pd = pd.DataFrame(real_all)
real_data_pd['anomaly'] = (distance >= threshold).astype(int)

anomaly_k_means = real_data_pd.loc[real_data_pd['anomaly'] == 1, [0]]

fig, ax = plot.subplots(figsize=(10,6))
ax.plot(real_data_pd[0], color='blue', label='Real Data')
ax.scatter(anomaly_k_means[0].index, anomaly_k_means[0], color='red', label='Anomaly')
plot.xlabel('Time')
plot.legend()
plot.show()

### Isolation Forest

In [None]:
data = real_all.reshape(-1, 1)
real_data_pd = pd.DataFrame(real_all)
scaler = StandardScaler()
np_scaled = scaler.fit_transform(data)
data = pd.DataFrame(np_scaled)

outliers_fraction = 0.03
model =  IsolationForest(contamination=outliers_fraction)
model.fit(data) 
real_data_pd['anomaly'] = pd.Series(model.predict(data))

anomaly_isolation_forest = real_data_pd.loc[real_data_pd['anomaly'] == -1, [0]]

fig, ax = plot.subplots(figsize=(10,6))
ax.plot(real_data_pd[0], color='blue', label='Real Data')
ax.scatter(anomaly_k_means_isolation_forest[0].index, anomaly_k_means_isolation_forest[0], color='red', label='Anomaly')
plot.xlabel('Time')
plot.legend()
plot.show()

### One Class SVM

In [None]:
data = real_all.reshape(-1, 1)
real_data_pd = pd.DataFrame(real_all)
scaler = StandardScaler()
np_scaled = scaler.fit_transform(data)
data = pd.DataFrame(np_scaled)


outliers_fraction = 0.03
model = OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.01)
model.fit(data)
real_data_pd['anomaly'] = pd.Series(model.predict(data))


anomaly_svm = real_data_pd.loc[real_data_pd['anomaly'] == -1, [0]]

fig, ax = plot.subplots(figsize=(10,6))
ax.plot(real_data_pd[0], color='blue', label='Real Data')
ax.scatter(anomaly_k_means_svm[0].index, anomaly_k_means_svm[0], color='red', label='Anomaly')
plot.xlabel('Time')
plot.legend()
plot.show()

In [None]:
### Figure_1 only point anomalies

fig, ax = plot.subplots(2, 2, figsize=(25, 12))
ax[0][0].plot(real_data_pd[0], color='#40375c', label='Real Data')
ax[0][0].scatter(x=anomaly_indices, y=anomaly_values, color="#f14235", label="Anomaly", linewidth=2)
ax[0][0].legend(loc = "upper right")
ax[0][0].set_title("AD by GAN-based Algorithm", pad=15)

ax[0][1].plot(real_data_pd[0], color='#40375c', label='Real Data')
ax[0][1].scatter(anomaly_k_means[0].index, anomaly_k_means[0], color='#f14235', label='Anomaly', linewidth=2)
ax[0][1].legend(loc = "upper right")
ax[0][1].set_title("AD by K-Means Clustering", pad=15)

ax[1][0].plot(real_data_pd[0], color='#40375c', label='Real Data')
ax[1][0].scatter(anomaly_isolation_forest[0].index, anomaly_isolation_forest[0], color='#f14235', label='Anomaly', linewidth=2)
ax[1][0].legend(loc = "upper right")
ax[1][0].set_title("AD by Isolation Forest", pad=15)

ax[1][1].plot(real_data_pd[0], color='#40375c', label='Real Data')
ax[1][1].scatter(anomaly_svm[0].index, anomaly_svm[0], color='#f14235', label='Anomaly', linewidth=2)
ax[1][1].legend(loc = "upper right")
ax[1][1].set_title("AD by One-Class SVM", pad=15)


for a in ax.flat:
    a.set(xlabel='Time')
    
for a in ax.flat:
    a.label_outer()

plot.rcParams.update({'font.size': 25, 'font.family': ['times-new-roman'],})
fig.patch.set_facecolor("white")
plot.show()

In [None]:
num_of_anomalies = 6

test_accuracy_gan = (len(anomaly_indices)/num_of_anomalies) * 100
print("Test Acccuracy by GAN: " + str(round(test_accuracy_gan, 2)) + "%")

test_accuracy_k_means = (len(anomaly_k_means[0])/num_of_anomalies) * 100
print("Test Acccuracy by K_means: " + str(round(test_accuracy_k_means, 2)) + "%")

test_accuracy_if = (len(anomaly_isolation_forest[0])/num_of_anomalies) * 100
print("Test Acccuracy by Isolation Forest: " + str(round(test_accuracy_if, 2)) + "%")

test_accuracy_svm = (len(anomaly_svm[0])/num_of_anomalies) * 100
print("Test Acccuracy by SVM: " + str(round(test_accuracy_svm, 2)) + "%")

In [None]:
#Fig 5 Point and Sequential anomlaies for demonstraion
fig, ax = plot.subplots(1, 1, figsize=(25, 12))
ax.plot(real_data_pd[0], color='#40375c', label='Real Data')
ax.scatter(anomaly_indices[:1], anomaly_values[:1], color="#f14235", label="Point Anomaly", linewidth=3)
ax.plot(anomaly_indices[1:], anomaly_values[1:], color="#f5a623", label="Sequential Anomalies", linewidth=3)
ax.legend(loc = "upper right")
# ax.set_title("Point and Sequential Anomalies", pad=15)

plot.rcParams.update({'font.size': 25, 'font.family': ['times-new-roman'],})
fig.patch.set_facecolor("white")
plot.show()

In [None]:
#Fig 5 Point and Sequential anomlaies for demonstraion
fig, ax = plot.subplots(1, 1, figsize=(25, 12))
ax.plot(real_data_pd[0], color='#40375c', label='Time Series Data')
ax.legend(loc = "upper left")
ax.set_title("Time Series Example", pad=15)
plot.rcParams.update({'font.size': 25, 'font.family': ['times-new-roman'],})
fig.patch.set_facecolor("white")
plot.show()