In [None]:
import sys
sys.path.append('/home/onyxia/work/tfl-training-practical-anomaly-detection/src')

In [None]:
%%capture

%load_ext autoreload
%autoreload 2
%matplotlib inline
%load_ext tfl_training_anomaly_detection

In [None]:
%presentation_style

In [None]:
%%capture

%set_random_seed 12

In [None]:
%load_latex_macros


# Introduction to Anomaly Detection
<img src="_static/images/aai-institute-cover.png" alt="Snow" style="width:100%;">

In [None]:
import numpy as np


import matplotlib
from matplotlib import pyplot as plt
from matplotlib.patches import Ellipse

from tfl_training_anomaly_detection.exercise_tools import evaluate, visualize_mahalanobis

from ipywidgets import interact

from sklearn.metrics import f1_score, precision_score, recall_score

%matplotlib inline
matplotlib.rcParams['figure.figsize'] = (5, 5)


## Exercise
Try the outlier scores for yourself in a simple synthetic scenario. We have prepared the `evaluate` function for you. Try to find the optimal threshold for the dataset.

In [None]:
nominal = np.random.normal(0, [1, 1.5], size=(300, 2))
anomaly = np.random.normal(5, 2, size=(10, 2))

data = np.concatenate([nominal, anomaly], axis=0)
y = np.zeros(310)
y[-10:] = 1

plt.scatter(data[:, 0], data[:,1], c=y)
plt.gca().set_aspect('equal')
plt.show()

**Fit a Gaussian**

In [None]:
mu = data.mean(axis=0)
Sigma_diag = data.std(axis=0) # assumes independant components
print('Mean: {}\nStd: {}'.format(mu, Sigma_diag))

## Question
How did the contamination influence the parameter estimation?

**Compute scores and evaluate**  

In [None]:
# Mahalanobis distance from the mean of N(mu, Sigma)
scores = np.sqrt(((data - mu) * (1/Sigma_diag) * (data - mu)).sum(axis=1)) 
curves = evaluate(y, scores)

**Choose a threshold**

In [None]:
def visualize_mahalanotis(data, y, scores, mu, sigma_diag, thr):
    _, axes = plt.subplots(figsize=(6, 6))

    # Visualize Data
    scatter_gt = axes.scatter(data[:, 0], data[:,1], c=y)
    plt.scatter(mu[0], mu[1], color='red')
    axes.set_title('Ground Truth')
    handles, _ = scatter_gt.legend_elements()
    axes.legend(handles, ['Nominal', 'Anomaly'])
    axes.set_aspect('equal')
    # Draw descicion contour
    descion_border = Ellipse(
        mu,
        width=2*np.sqrt(sigma_diag[0])*thr,
        height=2*np.sqrt(sigma_diag[1])*thr,
        color='red',
        fill=False
    )
    axes.add_patch(descion_border)
    
    # Evaluate threshold
    y_pred = scores >  thr

    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    
    axes.set_title("Precision: {}\nRecall: {}\nF1: {}".format(precision, recall, f1))
    
    plt.tight_layout()
    plt.show()


In [None]:
thr = None

@interact(threshold=(0., 6.))
def set_threshold(threshold):
    global thr
    thr = threshold
    plt.show()

In [None]:
visualize_mahalanobis(data, y, scores, mu, Sigma_diag, thr)

## Task: Find optimal threshold and evaluate on test set.
Choose good threshold. You may write additional code to determine the threshold.

In [None]:
thr_opt = 3.2 # 

In [None]:
data_test = np.concatenate([np.random.normal(0, [1, 1.5], size=(300, 2)), np.random.normal(3, 1.5, size=(10, 2))])

y_test = np.zeros(data_test.shape[0])
y_test[-10:] = 1

scores_test = np.sqrt(((data_test - mu) * (1/Sigma_diag) * (data_test - mu)).sum(axis=1)) 

visualize_mahalanotis(data_test, y_test, scores_test, mu, Sigma_diag, thr_opt)

<img src="_static/images/aai-institute-cover.png" alt="Snow" style="width:100%;">