In [3]:
%cd ../anomalydetection/sr-cnn

/Users/i506171/IdeaProjects/upscale-sre-aiops/anomalydetection/sr-cnn


In [21]:
from tqdm.notebook import tqdm
from torch.utils.data import Dataset
import numpy as np
import os
import json
import csv
import time
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.nn import functional as F

In [22]:
def spectral_residual(values):
    """
    This method transform a time series into spectral residual series
    :param values: list.
        a list of float values.
    :return: mag: list.
        a list of float values as the spectral residual values
    """
    EPS = 1e-8
    trans = np.fft.fft(values)
    mag = np.sqrt(trans.real ** 2 + trans.imag ** 2)

    maglog = [np.log(item) if abs(item) > EPS else 0 for item in mag]

    spectral = np.exp(maglog - average_filter(maglog, n=3))

    trans.real = [ireal * ispectral / imag if abs(imag) > EPS else 0
                  for ireal, ispectral, imag in zip(trans.real, spectral, mag)]
    trans.imag = [iimag * ispectral / imag if abs(imag) > EPS else 0
                  for iimag, ispectral, imag in zip(trans.imag, spectral, mag)]

    wave_r = np.fft.ifft(trans)
    mag = np.sqrt(wave_r.real ** 2 + wave_r.imag ** 2)

    return mag

In [23]:
def average_filter(values, n=3):
    """
    Calculate the sliding window average for the give time series.
    Mathematically, res[i] = sum_{j=i-t+1}^{i} values[j] / t, where t = min(n, i+1)
    :param values: list.
        a list of float numbers
    :param n: int, default 3.
        window size.
    :return res: list.
        a list of value after the average_filter process.
    """

    if n >= len(values):
        n = len(values)

    res = np.cumsum(values, dtype=float)
    res[n:] = res[n:] - res[:-n]
    res[n:] = res[n:] / n

    for i in range(1, n):
        res[i] /= (i + 1)

    return res

In [24]:
def predict_next(values):
    """
    Predicts the next value by sum up the slope of the last value with previous values.
    Mathematically, g = 1/m * sum_{i=1}^{m} g(x_n, x_{n-i}), x_{n+1} = x_{n-m+1} + g * m,
    where g(x_i,x_j) = (x_i - x_j) / (i - j)
    :param values: list.
        a list of float numbers.
    :return : float.
        the predicted next value.
    """

    if len(values) <= 1:
        raise ValueError(f'data should contain at least 2 numbers')

    v_last = values[-1]
    n = len(values)

    slopes = [(v_last - v) / (n - 1 - i) for i, v in enumerate(values[:-1])]

    return values[1] + sum(slopes)

In [25]:
def extend_series(values, extend_num=5, look_ahead=5):
    """
    extend the array data by the predicted next value
    :param values: list.
        a list of float numbers.
    :param extend_num: int, default 5.
        number of values added to the back of data.
    :param look_ahead: int, default 5.
        number of previous values used in prediction.
    :return: list.
        The result array.
    """

    if look_ahead < 1:
        raise ValueError('look_ahead must be at least 1')

    extension = [predict_next(values[-look_ahead - 2:-1])] * extend_num
    return np.concatenate((values, extension), axis=0)

In [10]:
kpis = {}
DATA_PATH=os.getcwd()+'/data/KPI/train.csv'
with open(DATA_PATH) as f:
    input = csv.reader(f, delimiter=',')
    cnt = 0
    for row in input:
        if cnt == 0:
            cnt += 1
            continue
        kpi = kpis.get(str(row[3]), [[],[],[]])
        kpi[0].append(int(row[0]))
        kpi[1].append(float(row[1]))
        kpi[2].append(int(row[2]))
        kpis[str(row[3])] = kpi
        cnt += 1
    f.close()

In [7]:
def read_csv_kpi(path):
    tm = []
    vl = []
    lb = []
    with open(path) as f:
        input = csv.reader(f, delimiter=',')
        cnt = 0
        for row in input:
            if cnt == 0:
                cnt += 1
                continue
            tm.append(int(row[0]))
            vl.append(float(row[1]))
            lb.append(int(row[2]))
            cnt += 1
        f.close()
    return tm, vl, lb

In [26]:
SEED = 54321
LEARNING_RATE = 1e-6
EPOCH = 10
BATCH_SIZE = 256
NUMBER_OF_WORKERS = 0
MODEL_SAVE_PATH=os.getcwd()+'/model/'

In [27]:
WINDOW_SIZE = 1440
z=21
extend_num= 5
tau = 3
backaddnum = 5
back = 0
step= 1

def predict(in_timestamp, in_value, in_label, threshold=0.95):
    detres = []
    scores = []
    length = len(in_timestamp)
    detres = [0] * (WINDOW_SIZE - backaddnum)
    scores = [0] * (WINDOW_SIZE - backaddnum)

    for pt in tqdm(range(WINDOW_SIZE - backaddnum + back + step, length - back, step)):
        head = max(0, pt - (WINDOW_SIZE - backaddnum))
        tail = min(length, pt)

        #print(pt, head, tail)
        wave = np.array(extend_series(in_value[head:tail + back], extend_num))

        ##print(wave.shape)
        residuals = spectral_residual(wave)
        #print(residuals.shape)
        for ipt in range(pt - step - back, pt - back):
            residual_mean = np.mean(residuals[ipt-head-z:ipt-head])
            current_residual = residuals[ipt - head]
            is_abnormal = 1 if ((current_residual - residual_mean) / (residual_mean + 0.001)) > tau else 0

            detres.append(is_abnormal)
            scores.append(residuals[ipt - head].item())
    detres += [0] * (length - len(detres))
    scores += [0] * (length - len(scores))
    
    last = -1
    interval = min([in_timestamp[i] - in_timestamp[i - 1] for i in range(1, len(in_timestamp))])
    for i in tqdm(range(1, len(in_timestamp))):
        if in_timestamp[i] - in_timestamp[i - 1] > interval:
            if last >= 0 and i - last < 1000:
                detres[i] = 1
                scores[i] = 1
        if detres[i] == 1:
            last = i
    
    return in_timestamp[:], in_label[:], detres[:], scores[:]

In [32]:
total_time = 0
results = []
savedscore = []
thres = 0.95

for kpi in kpis.values():
    in_timestamp = kpi[0]
    in_value = kpi[1]
    in_label = kpi[2]
    time_start = time.time()
    timestamp, label, pre, scores = predict(in_timestamp, in_value, in_label, thres)
    time_end = time.time()
    total_time += time_end - time_start
    results.append([timestamp, label, pre, f])
    savedscore.append([label, scores, f, timestamp])

HBox(children=(IntProgress(value=0, max=106281), HTML(value='')))




HBox(children=(IntProgress(value=0, max=107716), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=9702), HTML(value='')))




HBox(children=(IntProgress(value=0, max=11137), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7430), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8865), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7427), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8862), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=127556), HTML(value='')))




HBox(children=(IntProgress(value=0, max=128991), HTML(value='')))




HBox(children=(IntProgress(value=0, max=127436), HTML(value='')))




HBox(children=(IntProgress(value=0, max=128871), HTML(value='')))




HBox(children=(IntProgress(value=0, max=127574), HTML(value='')))




HBox(children=(IntProgress(value=0, max=129009), HTML(value='')))




HBox(children=(IntProgress(value=0, max=127348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=128783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=127351), HTML(value='')))




HBox(children=(IntProgress(value=0, max=128786), HTML(value='')))




HBox(children=(IntProgress(value=0, max=127353), HTML(value='')))




HBox(children=(IntProgress(value=0, max=128788), HTML(value='')))




HBox(children=(IntProgress(value=0, max=127610), HTML(value='')))




HBox(children=(IntProgress(value=0, max=129045), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144818), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146253), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144819), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146254), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144819), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146254), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144792), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146227), HTML(value='')))




HBox(children=(IntProgress(value=0, max=143699), HTML(value='')))




HBox(children=(IntProgress(value=0, max=145134), HTML(value='')))




HBox(children=(IntProgress(value=0, max=106635), HTML(value='')))




HBox(children=(IntProgress(value=0, max=108070), HTML(value='')))




HBox(children=(IntProgress(value=0, max=106638), HTML(value='')))




HBox(children=(IntProgress(value=0, max=108073), HTML(value='')))




HBox(children=(IntProgress(value=0, max=105039), HTML(value='')))




HBox(children=(IntProgress(value=0, max=106474), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144690), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146125), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144768), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146203), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144803), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146238), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144807), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146242), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144818), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146253), HTML(value='')))




HBox(children=(IntProgress(value=0, max=144817), HTML(value='')))




HBox(children=(IntProgress(value=0, max=146252), HTML(value='')))




In [30]:
def reconstruct_label(timestamp, label):
    timestamp = np.asarray(timestamp, np.int64)
    index = np.argsort(timestamp)

    timestamp_sorted = np.asarray(timestamp[index])
    interval = np.min(np.diff(timestamp_sorted))

    label = np.asarray(label, np.int64)
    label = np.asarray(label[index])

    idx = (timestamp_sorted - timestamp_sorted[0]) // interval

    new_label = np.zeros(shape=((timestamp_sorted[-1] - timestamp_sorted[0]) // interval + 1,), dtype=np.int64)
    new_label[idx] = label

    return new_label

In [10]:
def get_range_proba(predict, label, delay=7):
    predict = np.array(predict)
    label = np.array(label)

    splits = np.where(label[1:] != label[:-1])[0] + 1
    is_anomaly = label[0] == 1
    new_predict = np.array(predict)
    pos = 0

    for sp in splits:
        if is_anomaly:
            if 1 in predict[pos:min(pos + delay + 1, sp)]:
                new_predict[pos: sp] = 1
            else:
                new_predict[pos: sp] = 0
        is_anomaly = not is_anomaly
        pos = sp
    sp = len(label)

    if is_anomaly:
        if 1 in predict[pos: min(pos + delay + 1, sp)]:
            new_predict[pos: sp] = 1
        else:
            new_predict[pos: sp] = 0

    return new_predict

In [31]:
def reconstruct_series(timestamp, label, predict, delay=7):
    label = reconstruct_label(timestamp, label)
    predict = reconstruct_label(timestamp, predict)
    predict = get_range_proba(predict, label, delay)
    return label.tolist(), predict.tolist()

In [41]:
labels, predicts = [], []
delay=7
for timestamp, label, predict, _ in results:
    if timestamp == []:
        continue
    lbl, pdt = reconstruct_series(timestamp, label, predict, delay)
    labels += lbl
    predicts += pdt

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  new_label = np.zeros(shape=((timestamp_sorted[-1] - timestamp_sorted[0]) // interval + 1,), dtype=np.int)


In [29]:
def calc(pred, true):
    TP = 0
    FP = 0
    TN = 0
    FN = 0
    for pre, gt in zip(pred, true):
        if gt == 1:
            if pre == 1:
                TP += 1
            else:
                FN += 1
        if gt == 0:
            if pre == 1:
                FP += 1
            else:
                TN += 1
    return TP, FP, TN, FN

In [45]:
from sklearn.metrics import f1_score, precision_score, recall_score
f1 = f1_score(labels, predicts)
pre = precision_score(labels, predicts)
rec = recall_score(labels, predicts)
TP, FP, TN, FN = calc(predicts, labels)
print('precision', pre)
print('recall', rec)
print('f1', f1)

precision 0.48814189493836835
recall 0.8845815420971919
f1 0.6291162495474192


In [46]:
print(TP, FP, TN, FN)

70372 73791 2920222 9182


In [50]:
precision = TP / (TP + FP) # precision
precision

0.48814189493836835

In [52]:
recall = TP/(TP+FN) # recall
recall

0.8845815420971919

In [55]:
f1_score = 2*(recall * precision) / (recall + precision)
f1_score

0.6291162495474192

In [None]:
# now for the ground_truth.hdf file

In [13]:
import pandas as pd
TEST_DATA_PATH=os.getcwd()+'/data/KPI/test.hdf'
test_data = pd.read_hdf(TEST_DATA_PATH)

In [14]:
test_data

Unnamed: 0,timestamp,value,label,KPI ID
107717,1482940800,0.048066,0,da10a69f-d836-3baa-ad40-3e548ecf1fbd
107718,1482940860,0.010651,0,da10a69f-d836-3baa-ad40-3e548ecf1fbd
107719,1482940920,0.014996,0,da10a69f-d836-3baa-ad40-3e548ecf1fbd
107720,1482940980,0.039722,0,da10a69f-d836-3baa-ad40-3e548ecf1fbd
107721,1482941040,0.022452,0,da10a69f-d836-3baa-ad40-3e548ecf1fbd
...,...,...,...,...
295409,1508984160,34.940000,0,42d6616d-c9c5-370a-a8ba-17ead74f3114
295410,1508984220,35.200000,0,42d6616d-c9c5-370a-a8ba-17ead74f3114
295411,1508984280,33.020000,0,42d6616d-c9c5-370a-a8ba-17ead74f3114
295412,1508984340,35.150000,0,42d6616d-c9c5-370a-a8ba-17ead74f3114


In [15]:
kpis = test_data.groupby(test_data["KPI ID"])

In [28]:
total_time = 0
results = []
savedscore = []
thres = 0.95

for name, kpi in kpis:
    in_timestamp = kpi['timestamp'].tolist()
    in_value = kpi['value'].tolist()
    in_label = kpi['label'].tolist()
    time_start = time.time()
    timestamp, label, pre, scores = predict(in_timestamp, in_value, in_label, thres)
    time_end = time.time()
    total_time += time_end - time_start
    results.append([timestamp, label, pre, name])
    savedscore.append([label, scores, name, timestamp])

HBox(children=(IntProgress(value=0, max=147694), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149129), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147720), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149155), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147725), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149160), HTML(value='')))




HBox(children=(IntProgress(value=0, max=109193), HTML(value='')))




HBox(children=(IntProgress(value=0, max=110628), HTML(value='')))




HBox(children=(IntProgress(value=0, max=110130), HTML(value='')))




HBox(children=(IntProgress(value=0, max=111565), HTML(value='')))




HBox(children=(IntProgress(value=0, max=109934), HTML(value='')))




HBox(children=(IntProgress(value=0, max=111369), HTML(value='')))




HBox(children=(IntProgress(value=0, max=6180), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7615), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147697), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149132), HTML(value='')))




HBox(children=(IntProgress(value=0, max=110243), HTML(value='')))




HBox(children=(IntProgress(value=0, max=111678), HTML(value='')))




HBox(children=(IntProgress(value=0, max=109440), HTML(value='')))




HBox(children=(IntProgress(value=0, max=110875), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147686), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149121), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147712), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149147), HTML(value='')))




HBox(children=(IntProgress(value=0, max=106989), HTML(value='')))




HBox(children=(IntProgress(value=0, max=108424), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147687), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149122), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147682), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149117), HTML(value='')))




HBox(children=(IntProgress(value=0, max=109200), HTML(value='')))




HBox(children=(IntProgress(value=0, max=110635), HTML(value='')))




HBox(children=(IntProgress(value=0, max=109871), HTML(value='')))




HBox(children=(IntProgress(value=0, max=111306), HTML(value='')))




HBox(children=(IntProgress(value=0, max=6142), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7577), HTML(value='')))




HBox(children=(IntProgress(value=0, max=9344), HTML(value='')))




HBox(children=(IntProgress(value=0, max=10779), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147719), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149154), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147696), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149131), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=147723), HTML(value='')))




HBox(children=(IntProgress(value=0, max=149158), HTML(value='')))




HBox(children=(IntProgress(value=0, max=105731), HTML(value='')))




HBox(children=(IntProgress(value=0, max=107166), HTML(value='')))




HBox(children=(IntProgress(value=0, max=7348), HTML(value='')))




HBox(children=(IntProgress(value=0, max=8783), HTML(value='')))




HBox(children=(IntProgress(value=0, max=110713), HTML(value='')))




HBox(children=(IntProgress(value=0, max=112148), HTML(value='')))




HBox(children=(IntProgress(value=0, max=109960), HTML(value='')))




HBox(children=(IntProgress(value=0, max=111395), HTML(value='')))




In [32]:
labels, predicts = [], []
delay=7
for timestamp, label, predict, _ in results:
    if timestamp == []:
        continue
    lbl, pdt = reconstruct_series(timestamp, label, predict, delay)
    labels += lbl
    predicts += pdt

In [34]:
from sklearn.metrics import f1_score, precision_score, recall_score
f1 = f1_score(labels, predicts)
pre = precision_score(labels, predicts)
rec = recall_score(labels, predicts)
TP, FP, TN, FN = calc(predicts, labels)
print('precision', pre)
print('recall', rec)
print('f1', f1)

precision 0.5259406851099702
recall 0.880516862170088
f1 0.6585334091828132
