In [None]:
from utils.cluster import *
from utils.geo import *
from utils.loaddata import *
from utils.utils import *

In [None]:
from numpy import *
from tqdm import *
from sklearn.ensemble import IsolationForest

def get_centers(labels, flow):
    unique_labels = unique(labels)
    centers = []
    for label in unique_labels:
        centers.append(mean(flow[where(labels == label)], axis=0))
    return array(centers)

def cal_param(pred, real):
    count= 0.0
    for i in range(len(pred)):
        if pred[i] == 1 and pred[i] == real[i]:
            count = count + 1
    return count/len(where(pred == 1)[0]), count/len(where(real == 1)[0])

def iforest_ad(data, thresh = None):
    clf = IsolationForest(contamination=0.02, max_features=2)
    clf.fit(data)
    anomaly_score = clf.score_samples(data)
    anomaly_score = (anomaly_score-min(anomaly_score))/(max(anomaly_score)-min(anomaly_score))
    anomaly_score = 1 - anomaly_score
    if thresh == None:
        thresh = 90
    return where(anomaly_score >= percentile(anomaly_score, thresh))[0]

# get inflow, outflow data
inflow = load('./v2/gen_inflows.npy')
outflow = load('./v2/gen_outflows.npy')

inorm = norm_by_row(inflow)
onorm = norm_by_row(outflow)

aggl = load('./v2/aggl.npy')
ad_real = load('./v2/gen_anomalies.npy')

accs = []
recalls = []
f1s = []

tn = 6*7*24
rn = 256

for thresh in trange(79, 99):
    iforest_ads = array([])
    for t in range(0, tn):
        ad = array([0]*rn)
        for l in range(0, len(unique(aggl))):
            label_index = where(aggl == l)[0]
            io_norm = stack([inorm[label_index, t], onorm[label_index, t]], axis=1)
            ad_index = iforest_ad(io_norm, thresh)
            ad[label_index[ad_index]] = 1
        iforest_ads = concatenate([iforest_ads, ad])
    save('./iforest/' + str(thresh) + '.npy', iforest_ads)
    acc, recall = cal_param(iforest_ads, ad_real.T.flatten())
    accs.append(acc)
    recalls.append(recall)
    f1s.append((acc*recall)/(recall + acc))

In [None]:
fig = plt.figure(figsize=(20, 20))
_, ax = plt.subplots()
ax.plot(range(0, 20), accs, c='r', ls=':', marker='s', label='acc')
ax.set_xlabel('k')
ax.legend()

ax1 = ax.twinx()
ax1.plot(range(0, 20), recalls, c='b', ls=':', marker='.', label='recall')
ax1.legend()

ax1.set_xticks(range(0, 20), tuple(range(79, 99)))

plt.show()

In [None]:
save('./index/iforest_acc.npy', accs)
save('./index/iforest_recall.npy', recalls)
save('./index/iforest_f1.npy', recalls)

In [None]:
from utils.BaseSVDD import *

def svdd_ad(data, thresh = None):
    svdd = BaseSVDD(C=thresh, gamma=0.3, kernel='rbf', display='off')
    svdd.fit(data)
    anomaly_score = svdd.get_distance(data) - svdd.radius
#     anomaly_score = (anomaly_score-min(anomaly_score))/(max(anomaly_score)-min(anomaly_score))
    anomaly_score = array(anomaly_score).squeeze()
    return where(anomaly_score > 0)[0]

# get inflow, outflow data
inflow = load('./v2/gen_inflows.npy')
outflow = load('./v2/gen_outflows.npy')

inorm = norm_by_row(inflow)
onorm = norm_by_row(outflow)

aggl = load('./v2/aggl.npy')
ad_real = load('./v2/gen_anomalies.npy')

accs = []
recalls = []
f1s = []

for thresh in trange(79, 99):
    iforest_ads = array([])
    for t in range(0, tn):
        ad = array([0]*rn)
        for l in range(0, len(unique(aggl))):
            label_index = where(aggl == l)[0]
            io_norm = stack([inorm[label_index, t], onorm[label_index, t]], axis=1)
            ad_index = svdd_ad(io_norm, thresh)
            ad[label_index[ad_index]] = 1
        iforest_ads = concatenate([iforest_ads, ad])
    save('./svdd/' + str(thresh) + '.npy', iforest_ads)
    acc, recall = cal_param(iforest_ads, ad_real.T.flatten())
    accs.append(acc)
    recalls.append(recall)
    f1s.append((acc*recall)/(recall + acc))

In [None]:
fig = plt.figure(figsize=(20, 20))
_, ax = plt.subplots()
ax.plot(range(0, 20), accs, c='r', ls=':', marker='s', label='acc')
ax.set_xlabel('k')
ax.legend()

ax1 = ax.twinx()
ax1.plot(range(0, 20), recalls, c='b', ls=':', marker='.', label='recall')
ax1.legend()

ax1.set_xticks(range(0, 20), tuple(range(79, 99)))

plt.show()

In [None]:
save('./index/svdd_acc.npy', accs)
save('./index/svdd_recall.npy', recalls)
save('./index/svdd_f1.npy', f1s)

In [None]:
def chisquare_ad(data, k = 3):
    means = mean(data,axis=0)
    chi = [sum(((data[i] - means)**2)/means) for i in range(0, len(data))]
    cur = chi - mean(chi)
    anomaly_score = [0 if cur[i] < 0 else cur[i] for i in range(0, len(cur))]
    return where(anomaly_score >= k)[0]

accs = []
recalls = []
f1s = []

for thresh in tqdm(linspace(0, 1.25, 20)):
    print(thresh)
    iforest_ads = array([])
    for t in range(0, tn):
        ad = array([0]*rn)
        for l in range(0, len(unique(aggl))):
            label_index = where(aggl == l)[0]
            io_norm = stack([inorm[label_index, t], onorm[label_index, t]], axis=1)
            ad_index = chisquare_ad(io_norm, thresh)
            ad[label_index[ad_index]] = 1
        iforest_ads = concatenate([iforest_ads, ad])
    save('./chis/' + str(thresh) + '.npy', iforest_ads)
    acc, recall = cal_param(iforest_ads, ad_real.T.flatten())
    accs.append(acc)
    recalls.append(recall)
    f1s.append((acc*recall)/(recall + acc))

In [None]:
fig = plt.figure(figsize=(20, 20))
_, ax = plt.subplots()
ax.plot(range(0, 20), accs, c='r', ls=':', marker='s', label='acc')
ax.set_xlabel('k')
ax.legend()

ax1 = ax.twinx()
ax1.plot(range(0, 20), recalls, c='b', ls=':', marker='.', label='recall')
ax1.legend()

ax1.set_xticks(range(0, 20), tuple(range(79, 99)))

plt.show()

In [None]:
save('./index/chis_acc.npy', accs)
save('./index/chis_recall.npy', recalls)
save('./index/chis_f1.npy', f1s)