In [1]:
import argparse
import torch
import os, glob

from preprocess import get_data
from models.main import build_network
from utils.utils import save_metrics

import numpy as np
from sklearn.metrics import auc

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from tensorboard.backend.event_processing import event_accumulator

import os, glob

def plot_event(args):
    event = glob.glob(os.path.join(args.directory, 'event*'))
    ea = event_accumulator.EventAccumulator(event[0])
    ea.Reload() 
    ea.Tags()
    metrics =  np.unique([metric.split('_')[0] for metric in ea.Tags()['scalars']])
    fig, axs = plt.subplots(nrows=1, ncols=len(metrics), figsize=(15,8))
    for i, metric in enumerate(metrics):
        axs[i].set_title(metric)
        train = pd.DataFrame(ea.Scalars('{}_train'.format(metric))).value.values
        val = pd.DataFrame(ea.Scalars('{}_val'.format(metric))).value.values
        axs[i].plot(np.arange(len(train)), train, c='k', label='train')
        axs[i].plot(np.arange(len(val)), val, c='b', label='val')
        axs[i].set_title('Learning curve {}'.format(metric), fontsize=25)
        axs[i].set_xlabel('Epoch', fontsize=20)
        axs[i].set_ylabel(metric, fontsize=20)
        axs[i].grid(True)
        axs[i].legend(loc='best', fontsize=20)
    fig.tight_layout()
    fig.subplots_adjust(top=0.8)
    plt.savefig('{}/learning_curves.png'.format(args.directory))
    plt.close()
    
def plot_histogram(args, scores_in, scores_out):
    plt.figure(figsize=(8,4))
    plt.title('Inliers vs Outliers {}'.format(args.model), fontsize=16)
    plt.hist(scores_in, label='Inliers', bins=25, density=True, histtype='step', color='b')
    plt.hist(scores_out, label='Outliers', bins=25, density=True, histtype='step', color='r')
    plt.legend(fontsize=14)
    plt.savefig('{}/histogram.png'.format(args.directory))
    plt.close()
    
def plot_metrics(args, metric_name, score, m1, m2):
    plt.title('{} curve. Score: {:.2f}'.format(metric_name, score), fontsize=20)
    plt.plot(m1, m2)
    if metric_name=='AU ROC':
        m1_name = 'FPR'
        m2_name = 'TPR'
        ident=[0.0, 1.0]
        plt.plot(ident, ident, c='r')
    elif metric_name=='AU PR':
        m1_name = 'Recall'
        m2_name = 'Precision'
        plt.plot([1.0, 0.0], [0.0, 1.0], c='r')
    plt.xlabel(m1_name, fontsize=18)
    plt.ylabel(m2_name, fontsize=18)
    plt.grid()
    plt.savefig('{}/{}.png'.format(args.directory, metric_name))
    plt.close()

In [3]:
def compute_metrics(args, scores, labels):
    """
    Computing the Area under the curve ROC and PR.
    """
    in_scores = scores[labels==0]
    out_scores = scores[labels==1]

    auroc, aupr = compute_roc_pr(args, in_scores, out_scores)
    metrics = {'AU ROC': auroc,
               'AU PR': aupr,
               }
    return metrics

def compute_roc_pr(args, inliers_scores, outlier_scores):
    auroc_score, fprs, tprs = auroc(inliers_scores, outlier_scores)
    plot_metrics(args, 'AU ROC', auroc_score, fprs, tprs)
    aupr_score, recalls, precisions = aupr(inliers_scores, outlier_scores)
    plot_metrics(args, 'AU PR', aupr_score, recalls, precisions)
    return auroc_score, aupr_score

def auroc(in_scores, out_scores):
    scores = np.concatenate((in_scores, out_scores), axis=0)
    start = np.min(scores)
    end = np.max(scores)   
    gap = (end - start)/100000

    tprs = []
    fprs = []
    for delta in np.arange(end, start, -gap):
        tpr = np.sum(np.sum(out_scores >= delta)) / np.float(len(out_scores))
        fpr = np.sum(np.sum(in_scores >= delta)) / np.float(len(in_scores))
        tprs.append(tpr)
        fprs.append(fpr)
    return auc(fprs, tprs), fprs, tprs

def aupr(in_scores, out_scores):
    scores = np.concatenate((in_scores, out_scores), axis=0)
    start = np.min(scores)
    end = np.max(scores)   
    gap = (end - start)/100000
    
    precisions = []
    recalls = []
    for delta in np.arange(end, start, -gap):
        tp = np.sum(np.sum(out_scores >= delta)) #/ np.float(len(out_scores))
        fp = np.sum(np.sum(in_scores >= delta)) #/ np.float(len(in_scores))
        if tp + fp == 0: continue
        precision = tp / (tp + fp)
        recall = tp / np.float(len(out_scores))
        precisions.append(precision)
        recalls.append(recall)
    return auc(recalls, precisions), recalls, precisions

def print_metrics(metrics):
    for metric, value in metrics.items():
        print("{}: {:.3f}".format(metric, value))
    print("##########################################")

In [4]:
def test(args, dataloader):
    """Evaluting the anomaly detection model."""
    model = build_network(args).to(args.device)
    ### Loading the trained model...
    state_dict = torch.load('{}/trained_parameters.pth'.format(args.directory))
    model.load_state_dict(state_dict)
    
    scores = []
    out_labels = []
    
    model.eval()
    with torch.no_grad():
        for _, x, _, y_out in dataloader:
            x = x.float().to(args.device)

            score = model.compute_anomaly_score(x)
            scores.append(score.detach().cpu())
            out_labels.append(y_out.cpu())
    
    scores = torch.cat(scores).numpy()
    out_labels = torch.cat(out_labels).numpy()
    
    metrics = compute_metrics(args, scores, out_labels)
    plot_histogram(args, scores[out_labels==0], scores[out_labels==1])
    print_metrics(metrics)
    save_metrics(metrics, args.directory, 'test')
    return metrics 

In [5]:
class Args:
    r='./experiments'
    z_dim=128
    in_dim=152
    data_pth='../data'
    batch_size=128

args = Args()

args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
args.model = 'ae'
args.hierClass = 'Periodic'
args.lr = 0.001

In [8]:
import json

if args.hierClass=='Transient':
    possible_outliers = ['SLSN',
                         'SNII',
                         'SNIa',
                         'SNIbc']
elif args.hierClass == 'Stochastic':
    possible_outliers = [ 'AGN' ,
                         'Blazar',
                         'CV/Nova',
                         'QSO',
                         'YSO']
elif args.hierClass == 'Periodic':
    possible_outliers = ['CEP',
                         'DSCT',
                         'E',
                         'RRL', 
                         'LPV']

auprs = []
auprs_devs = []
aurocs = []
aurocs_devs = []

for outlier in possible_outliers:
    auroc = []
    auroc_dev = []
    aupr = []
    aupr_dev = []
    for fold in range(5):
        args.fold = fold
        args.outlier = outlier
        
        job_name = '{}_{}_{}_lr{}_ld{}_fold{}'.format(
                   args.model, args.hierClass, args.outlier, args.lr, args.z_dim, args.fold)

        args.directory = os.path.join(args.r, job_name)

        #_, _, dataloader_test = get_data(args)
        #test(args, dataloader_test)
        data = json.load(open('{}/metrics_test.json'.format(args.directory)))
        auroc.append(data['AU ROC'])
        aupr.append(data['AU PR'])
    aurocs.append(np.mean(auroc))
    aurocs_devs.append(np.std(auroc))
    auprs.append(np.mean(aupr))
    auprs_devs.append(np.std(aupr))
    print(outlier)
    print('AU ROC {:.3f} +- {:.3f}'.format(aurocs[-1], aurocs_devs[-1]))
    print('AU PR {:.3f} +- {:.3f}'.format(auprs[-1], auprs_devs[-1]))

CEP
AU ROC 0.564 +- 0.024
AU PR 0.133 +- 0.015
DSCT
AU ROC 0.367 +- 0.015
AU PR 0.073 +- 0.002
E
AU ROC 0.864 +- 0.009
AU PR 0.650 +- 0.015
RRL
AU ROC 0.907 +- 0.015
AU PR 0.815 +- 0.035
LPV
AU ROC 0.996 +- 0.000
AU PR 0.987 +- 0.001
