In [1]:
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# add the parent dir of notebooks to path
sys.path.append(os.path.dirname(os.path.abspath('')))


In [2]:
from evalutation.evaluate import evaluate

In [3]:
resulting_df = evaluate()
resulting_df

Training NullPredictor on dataset NAB Tweets with contamination 0%
Training NullPredictor on dataset NAB Tweets with contamination 1%
Training NullPredictor on dataset NAB Tweets with contamination 2%
Training NullPredictor on dataset NAB Tweets with contamination 3%
Training NullPredictor on dataset NAB Tweets with contamination 4%
Training NullPredictor on dataset NAB Tweets with contamination 5%
Training NullPredictor on dataset Yahoo A1 with contamination 0%
Training NullPredictor on dataset Yahoo A1 with contamination 1%
Training NullPredictor on dataset Yahoo A1 with contamination 2%
Training NullPredictor on dataset Yahoo A1 with contamination 3%
Training NullPredictor on dataset Yahoo A1 with contamination 4%
Training NullPredictor on dataset Yahoo A1 with contamination 5%


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,precision,recall,f1
dataset,model,contamination,threshold,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
NAB Tweets,NullPredictor,0.00,0.50,0.449336,0.515484,0.480142
NAB Tweets,NullPredictor,0.00,0.60,0.459947,0.413054,0.435241
NAB Tweets,NullPredictor,0.00,0.70,0.449213,0.299190,0.359165
NAB Tweets,NullPredictor,0.00,0.80,0.444444,0.207718,0.283117
NAB Tweets,NullPredictor,0.00,0.90,0.448980,0.104812,0.169950
...,...,...,...,...,...,...
Yahoo A1,NullPredictor,0.05,0.60,0.852459,0.418231,0.561151
Yahoo A1,NullPredictor,0.05,0.70,0.834586,0.297587,0.438735
Yahoo A1,NullPredictor,0.05,0.80,0.813187,0.198391,0.318966
Yahoo A1,NullPredictor,0.05,0.90,0.979592,0.128686,0.227488


In [2]:
from data.nab.real_tweets import nab_multivariate_tweet_volume, STOCK_NAMES
tweet_df = nab_multivariate_tweet_volume()

# Evalutation framework




In [None]:
def cross_validation_split(df, frac_cv=0.3):
    """
    :return: (train_df, cv_df)
    """
    split_iloc = len(df) - int(np.floor(len(df) * frac_cv))
    return df.iloc[:split_iloc], df.iloc[split_iloc:]

train_df, cv_df = cross_validation_split(tweet_df)

In [None]:
len(train_df)

In [None]:
len(cv_df)

In [None]:
def contaminate(df, contamination_factor=0.05):
    num_indices_to_contaminate = int(np.floor(len(df)*contamination_factor))
    contaminated_indices = np.random.randint(0, len(df), size=(num_indices_to_contaminate,))
    
    contaminated_df = df.copy()
    
    for index in contaminated_indices:
        index = int(index)
        for colidx, col in enumerate(df.columns):
            if "anomaly" in col:
                contaminated_df.iat[index, colidx] = True
            if "value" in col:
                contaminated_df.iat[index, colidx] = np.random.random_sample() * np.max(df[col]) * 2
    return contaminated_df


In [None]:
comp_df = (~(contaminate(train_df) == train_df))
comp_df
comp_df.any()

---

We want our evaluation framework to produce a table of results

* For each dataset (NAB Tweets, Yahoo A1)
    * For varying contamination levels from 0% to 5%
        * For each model to be tested
            * For various anomaly thresholds from 0 to 1 (0.5, 0.6, 0.7, 0.8, 0.9)
                * run the model, output the F1-score, Precision and Recall


In [None]:
from abc import ABCMeta, abstractmethod
from data.nab.real_tweets import nab_multivariate_tweet_volume, STOCK_NAMES
from data.yahoo.a1_benchmark import yahoo_a1_benchmark
from sklearn.metrics import precision_recall_fscore_support

tweet_df = nab_multivariate_tweet_volume()
yahoo_df = yahoo_a1_benchmark()

DATASETS = [
    ("NAB Tweets", tweet_df),
    ("Yahoo A1", yahoo_df),
]
CONTAMINATIONS = [0, 0.01, 0.02, 0.03, 0.04, 0.05]
ANOMALY_THRESHOLDS = [0.5, 0.6, 0.7, 0.8, 0.9, 0.95]


class Model(metaclass=ABCMeta):
    @abstractmethod
    def reset(self):
        raise NotImplementedError
    
    @abstractmethod
    def fit(self, df):
        raise NotImplementedError
    
    @abstractmethod
    def predict_proba(self, df):
        raise NotImplementedError
    
    def predict(self, df, anomaly_threshold=0.5):
        predictions = self.predict_proba(df)
        return np.where(predictions > anomaly_threshold, 1, 0)


class NullPredictor(Model):
    def reset(self):
        pass
    
    def fit(self, df):
        pass
    
    def predict_proba(self, df):
        return np.random.rand(len(df))

MODELS = [
    ("NullPredictor", NullPredictor())
]


def remove_anomaly_labels(df):
    removed_df = df.copy()
    for col in df.columns:
        if "anomaly" in col:
            del removed_df[col]
    return removed_df


def extract_anomaly_labels_to_row(df):
    anomaly_cols = []
    for col in df.columns:
        if "anomaly" in col:
            anomaly_cols.append(col)
    
    anomaly_df = df[anomaly_cols]
    return np.where(anomaly_df.any(axis=1), 1, 0)


def evaluate(models=MODELS):
    # dataset, contamination, threshold, model, precision, recall, f1
    results = []
    
    for dataset_name, df in DATASETS:
        train_df, cv_df = cross_validation_split(df)
        cv_labels = extract_anomaly_labels_to_row(cv_df)
        
        for contamination_level in CONTAMINATIONS:
            contaminated_train_df = contaminate(train_df, contamination_factor=contamination_level)

            for model_label, model in models:
                model.reset()
                model.fit(contaminated_train_df)

                for anomaly_threshold in ANOMALY_THRESHOLDS:
                    outputs = model.predict(remove_anomaly_labels(cv_df), anomaly_threshold=anomaly_threshold)
                    
                    precision, recall, f1, support = precision_recall_fscore_support(cv_labels, outputs)
                    
                    results.append(
                        # dataset, contamination, threshold, model, precision, recall, f1
                        [dataset_name, contamination_level, anomaly_threshold, model_label, precision[1], recall[1], f1[1]]
                    )
    
    result_df = pd.DataFrame(results, columns="dataset, contamination, threshold, model, precision, recall, f1".split(", "))
    result_df.set_index(["dataset", "model", "contamination", "threshold"], inplace=True)
    return result_df


In [None]:
resulting_df = evaluate()

In [None]:
resulting_df.loc["NAB Tweets"].loc["NullPredictor"].loc[0]

In [3]:
tweet_df.rolling(2)

Rolling [window=2,center=False,axis=0]