# SIGKDD21 multi-dataset time series anomaly detection (TSAD) competition

- Source and description: https://compete.hexagon-ml.com/practice/competition/39

In [1]:
from typing import List
import matplotlib
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from config import data_raw_folder, data_processed_folder
from timeeval import Datasets
from timeeval.datasets import DatasetAnalyzer, DatasetRecord

In [2]:
plt.rcParams["figure.figsize"] = (20, 10)

In [3]:
def find_datasets(folder):
    if not isinstance(folder, Path):
        folder = Path(folder)
    return sorted([f for f in folder.glob("*.txt") if f.is_file()])

def plot_dataset(f, with_split=True):
    name = f.stem
    split_at = int(name.split("_")[-3])
    anomaly = tuple(int(idx) for idx in name.split("_")[-2:])
    data = np.genfromtxt(f)
    if with_split:
        train = np.full_like(data, fill_value=np.nan)
        train[:split_at] = data[:split_at]
        test = np.full_like(data, fill_value=np.nan)
        test[split_at:] = data[split_at:]
        plt.plot(train, label="train", color="red")
        plt.plot(test, label="test", color="green")
    else:
        plt.plot(data, label=name, color="black")
    # add anomaly label
    plt.gca().add_patch(matplotlib.patches.Rectangle(
        (anomaly[0], data.min()),
        anomaly[1]-anomaly[0],
        data.max()-data.min(),
        color="yellow", alpha=0.75
    ))
    plt.title(name)
    return data

In [4]:
dataset_collection_name = "KDD-TSAD"
source_folder = Path(data_raw_folder) / "UCR_TimeSeriesAnomalyDatasets2021" / "FilesAreInHere" / "UCR_Anomaly_FullData"
target_folder = Path(data_processed_folder)

print(f"Looking for source datasets in {Path(source_folder).absolute()} and\nsaving processed datasets in {Path(target_folder).absolute()}")

Looking for source datasets in /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-raw/UCR_TimeSeriesAnomalyDatasets2021/FilesAreInHere/UCR_Anomaly_FullData and
saving processed datasets in /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed


In [5]:
# shared by all datasets
dataset_type = "synthetic"
input_type = "univariate"
datetime_index = False
split_at = None
train_is_normal = True
train_type = "semi-supervised"

# create target directory
dataset_subfolder = Path(input_type) / dataset_collection_name
target_subfolder = target_folder / dataset_subfolder
target_subfolder.mkdir(parents=True, exist_ok=True)
print(f"Created directories {target_subfolder}")

dm = Datasets(target_folder)

Created directories /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD


In [6]:
def process_dataset(dm: Datasets, idx: int, f: Path) -> None:
    print(f"> Processing source dataset {idx}")
    name_parts = f.stem.split("_")
    dataset_name = "_".join(name_parts[:-3])
    split_at = int(name_parts[-3])
    anomaly = tuple(int(idx) for idx in name_parts[-2:])
    test_filename = f"{dataset_name}.test.csv"
    train_filename = f"{dataset_name}.train.csv"
    test_path = dataset_subfolder / test_filename
    train_path = dataset_subfolder / train_filename
    target_test_filepath = target_subfolder / test_filename
    target_train_filepath = target_subfolder / train_filename
    target_meta_filepath = target_test_filepath.parent / f"{dataset_name}.{Datasets.METADATA_FILENAME_PREFIX}"

    # Prepare datasets
    if not target_test_filepath.exists() or not target_train_filepath.exists() or not target_meta_filepath.exists():
        data = np.genfromtxt(f)
        df_test = pd.DataFrame(data, columns=["value"])
        df_test.insert(0, "timestamp", df_test.index.values)
        df_test["is_anomaly"] = 0
        df_test.loc[range(anomaly[0], anomaly[1]), "is_anomaly"] = 1
        df_test.to_csv(target_test_filepath, index=False)

        df_train = df_test[:split_at].copy()
        df_train.to_csv(target_train_filepath, index=False)
        print(f"  written dataset {idx}")
    else:
        df_test = df_train = None
        print(f"  skipped writing dataset {idx} to disk, because it already exists.")
    
    ignore_stationarity = idx > 237

    # Prepare metadata
    def analyze(df_test, df_train):
        da = DatasetAnalyzer((dataset_collection_name, dataset_name), is_train=False, df=df_test, ignore_stationarity=ignore_stationarity)
        da.save_to_json(target_meta_filepath, overwrite=True)
        meta = da.metadata
        print(f"  analyzed test dataset {idx}")

        DatasetAnalyzer((dataset_collection_name, dataset_name), is_train=True, df=df_train, ignore_stationarity=ignore_stationarity)\
            .save_to_json(target_meta_filepath, overwrite=False)
        print(f"  analyzed training dataset {idx}")
        return meta
        
    if target_meta_filepath.exists():
        try:
            meta = DatasetAnalyzer.load_from_json(target_meta_filepath, train=False)
        except ValueError:
            if df_test is None:
                df_test = pd.read_csv(target_test_filepath)
            if df_train is None:
                df_train = pd.read_csv(target_train_filepath)
            meta = analyze(df_test, df_train)
        else:

            # check if train metadata is also present
            try:
                DatasetAnalyzer.load_from_json(target_meta_filepath, train=True)
                print(f"  skipped analyzing dataset {idx}, because metadata already exists.")
            except ValueError:
                if df_train is None:
                    df_train = pd.read_csv(target_train_filepath)
                DatasetAnalyzer((dataset_collection_name, dataset_name), is_train=True, df=df_train, ignore_stationarity=ignore_stationarity)\
                    .save_to_json(target_meta_filepath, overwrite=False)
                print(f"  analyzed training dataset {idx}")
    else:
        meta = analyze(df_test, df_train)

    dm.add_dataset(DatasetRecord(
          collection_name=dataset_collection_name,
          dataset_name=dataset_name,
          train_path=train_path,
          test_path=test_path,
          dataset_type=dataset_type,
          datetime_index=datetime_index,
          split_at=split_at,
          train_type=train_type,
          train_is_normal=train_is_normal,
          input_type=input_type,
          length=meta.length,
          dimensions=meta.dimensions,
          contamination=meta.contamination,
          num_anomalies=meta.num_anomalies,
          min_anomaly_length=meta.anomaly_length.min,
          median_anomaly_length=meta.anomaly_length.median,
          max_anomaly_length=meta.anomaly_length.max,
          mean=meta.mean,
          stddev=meta.stddev,
          trend=meta.trend,
          stationarity=meta.get_stationarity_name(),
          period_size=np.nan
    ))
    print(f"... processed source dataset {idx}: {f.name} -> {target_test_filepath}")

for i, file in enumerate(find_datasets(source_folder)):
    process_dataset(dm, i, file)
dm.save()

> Processing source dataset 0
  skipped writing dataset 0 to disk, because it already exists.
  skipped analyzing dataset 0, because metadata already exists.
... processed source dataset 0: 001_UCR_Anomaly_DISTORTED1sddb40_35000_52000_52620.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/001_UCR_Anomaly_DISTORTED1sddb40.test.csv
> Processing source dataset 1
  skipped writing dataset 1 to disk, because it already exists.
  skipped analyzing dataset 1, because metadata already exists.
... processed source dataset 1: 002_UCR_Anomaly_DISTORTED2sddb40_35000_56600_56900.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/002_UCR_Anomaly_DISTORTED2sddb40.test.csv
> Processing source dataset 2
  skipped writing dataset 2 to disk, because it already exists.
  skipped analyzing dataset 2, because metadata already exists.
... processed source dataset 2: 003_UCR_Anomaly_DISTORTED3sddb40_35000

... processed source dataset 27: 028_UCR_Anomaly_DISTORTEDInternalBleeding17_1600_3198_3309.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/028_UCR_Anomaly_DISTORTEDInternalBleeding17.test.csv
> Processing source dataset 28
  skipped writing dataset 28 to disk, because it already exists.
  skipped analyzing dataset 28, because metadata already exists.
... processed source dataset 28: 029_UCR_Anomaly_DISTORTEDInternalBleeding18_2300_4485_4587.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/029_UCR_Anomaly_DISTORTEDInternalBleeding18.test.csv
> Processing source dataset 29
  skipped writing dataset 29 to disk, because it already exists.
  skipped analyzing dataset 29, because metadata already exists.
... processed source dataset 29: 030_UCR_Anomaly_DISTORTEDInternalBleeding19_3000_4187_4197.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univari

... processed source dataset 55: 056_UCR_Anomaly_DISTORTEDapneaecg3_5000_11111_11211.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/056_UCR_Anomaly_DISTORTEDapneaecg3.test.csv
> Processing source dataset 56
  skipped writing dataset 56 to disk, because it already exists.
  skipped analyzing dataset 56, because metadata already exists.
... processed source dataset 56: 057_UCR_Anomaly_DISTORTEDapneaecg4_6000_16000_16100.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/057_UCR_Anomaly_DISTORTEDapneaecg4.test.csv
> Processing source dataset 57
  skipped writing dataset 57 to disk, because it already exists.
  skipped analyzing dataset 57, because metadata already exists.
... processed source dataset 57: 058_UCR_Anomaly_DISTORTEDapneaecg_10000_12240_12308.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/058_UCR_Anomaly_DISTORTEDa

... processed source dataset 84: 085_UCR_Anomaly_DISTORTEDs20101m_10000_35774_35874.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/085_UCR_Anomaly_DISTORTEDs20101m.test.csv
> Processing source dataset 85
  skipped writing dataset 85 to disk, because it already exists.
  skipped analyzing dataset 85, because metadata already exists.
... processed source dataset 85: 086_UCR_Anomaly_DISTORTEDsddb49_20000_67950_68200.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/086_UCR_Anomaly_DISTORTEDsddb49.test.csv
> Processing source dataset 86
  skipped writing dataset 86 to disk, because it already exists.
  skipped analyzing dataset 86, because metadata already exists.
... processed source dataset 86: 087_UCR_Anomaly_DISTORTEDsel840mECG1_17000_51370_51740.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/087_UCR_Anomaly_DISTORTEDsel840

... processed source dataset 113: 114_UCR_Anomaly_CIMIS44AirTemperature2_4000_5703_5727.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/114_UCR_Anomaly_CIMIS44AirTemperature2.test.csv
> Processing source dataset 114
  skipped writing dataset 114 to disk, because it already exists.
  skipped analyzing dataset 114, because metadata already exists.
... processed source dataset 114: 115_UCR_Anomaly_CIMIS44AirTemperature3_4000_6520_6544.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/115_UCR_Anomaly_CIMIS44AirTemperature3.test.csv
> Processing source dataset 115
  skipped writing dataset 115 to disk, because it already exists.
  skipped analyzing dataset 115, because metadata already exists.
... processed source dataset 115: 116_UCR_Anomaly_CIMIS44AirTemperature4_4000_5549_5597.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/116

... processed source dataset 142: 143_UCR_Anomaly_InternalBleeding8_2500_5865_5974.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/143_UCR_Anomaly_InternalBleeding8.test.csv
> Processing source dataset 143
  skipped writing dataset 143 to disk, because it already exists.
  skipped analyzing dataset 143, because metadata already exists.
... processed source dataset 143: 144_UCR_Anomaly_InternalBleeding9_4200_6599_6681.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/144_UCR_Anomaly_InternalBleeding9.test.csv
> Processing source dataset 144
  skipped writing dataset 144 to disk, because it already exists.
  skipped analyzing dataset 144, because metadata already exists.
... processed source dataset 144: 145_UCR_Anomaly_Lab2Cmac011215EPG1_5000_17210_17260.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/145_UCR_Anomaly_Lab2Cmac0

... processed source dataset 172: 173_UCR_Anomaly_insectEPG1_3000_7000_7030.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/173_UCR_Anomaly_insectEPG1.test.csv
> Processing source dataset 173
  skipped writing dataset 173 to disk, because it already exists.
  skipped analyzing dataset 173, because metadata already exists.
... processed source dataset 173: 174_UCR_Anomaly_insectEPG2_3700_8000_8025.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/174_UCR_Anomaly_insectEPG2.test.csv
> Processing source dataset 174
  skipped writing dataset 174 to disk, because it already exists.
  skipped analyzing dataset 174, because metadata already exists.
... processed source dataset 174: 175_UCR_Anomaly_insectEPG3_5200_7000_7050.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/175_UCR_Anomaly_insectEPG3.test.csv
> Processing source dataset

... processed source dataset 202: 203_UCR_Anomaly_CHARISfive_11812_10995_11028.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/203_UCR_Anomaly_CHARISfive.test.csv
> Processing source dataset 203
  skipped writing dataset 203 to disk, because it already exists.
  skipped analyzing dataset 203, because metadata already exists.
... processed source dataset 203: 204_UCR_Anomaly_CHARISfive_12412_15000_15070.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/204_UCR_Anomaly_CHARISfive.test.csv
> Processing source dataset 204
  skipped writing dataset 204 to disk, because it already exists.
  skipped analyzing dataset 204, because metadata already exists.
... processed source dataset 204: 205_UCR_Anomaly_CHARISfive_9812_28995_29085.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/205_UCR_Anomaly_CHARISfive.test.csv
> Processing source

... processed source dataset 231: 232_UCR_Anomaly_mit14134longtermecg_8763_57530_57790.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/232_UCR_Anomaly_mit14134longtermecg.test.csv
> Processing source dataset 232
  skipped writing dataset 232 to disk, because it already exists.
  skipped analyzing dataset 232, because metadata already exists.
... processed source dataset 232: 233_UCR_Anomaly_mit14157longtermecg_18913_24500_24501.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/233_UCR_Anomaly_mit14157longtermecg.test.csv
> Processing source dataset 233
  skipped writing dataset 233 to disk, because it already exists.
  skipped analyzing dataset 233, because metadata already exists.
... processed source dataset 233: 234_UCR_Anomaly_mit14157longtermecg_18913_24600_24601.txt -> /home/sebastian/Documents/Projects/akita/data/benchmark-data/data-processed/univariate/KDD-TSAD/234_UCR_An

In [7]:
dm.refresh()
dm.df().loc[(slice(dataset_collection_name,dataset_collection_name), slice(None))]

Unnamed: 0_level_0,Unnamed: 1_level_0,train_path,test_path,dataset_type,datetime_index,split_at,train_type,train_is_normal,input_type,length,dimensions,contamination,num_anomalies,min_anomaly_length,median_anomaly_length,max_anomaly_length,mean,stddev,trend,stationarity,period_size
collection_name,dataset_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
KDD-TSAD,001_UCR_Anomaly_DISTORTED1sddb40,univariate/KDD-TSAD/001_UCR_Anomaly_DISTORTED1...,univariate/KDD-TSAD/001_UCR_Anomaly_DISTORTED1...,synthetic,False,35000.0,semi-supervised,True,univariate,79795,1,0.007770,1,620,620,620,-27.144590,175.478213,no trend,difference_stationary,
KDD-TSAD,002_UCR_Anomaly_DISTORTED2sddb40,univariate/KDD-TSAD/002_UCR_Anomaly_DISTORTED2...,univariate/KDD-TSAD/002_UCR_Anomaly_DISTORTED2...,synthetic,False,35000.0,semi-supervised,True,univariate,80001,1,0.003750,1,300,300,300,-25.141412,173.018208,no trend,difference_stationary,
KDD-TSAD,003_UCR_Anomaly_DISTORTED3sddb40,univariate/KDD-TSAD/003_UCR_Anomaly_DISTORTED3...,univariate/KDD-TSAD/003_UCR_Anomaly_DISTORTED3...,synthetic,False,35000.0,semi-supervised,True,univariate,80000,1,0.003750,1,300,300,300,-24.828451,172.602470,no trend,difference_stationary,
KDD-TSAD,004_UCR_Anomaly,univariate/KDD-TSAD/004_UCR_Anomaly.train.csv,univariate/KDD-TSAD/004_UCR_Anomaly.test.csv,real,False,2500.0,semi-supervised,True,univariate,11000,1,0.018182,1,200,200,200,1130.949099,9315.219071,no trend,difference_stationary,
KDD-TSAD,004_UCR_Anomaly_DISTORTEDBIDMC1,univariate/KDD-TSAD/004_UCR_Anomaly_DISTORTEDB...,univariate/KDD-TSAD/004_UCR_Anomaly_DISTORTEDB...,synthetic,False,2500.0,semi-supervised,True,univariate,11000,1,0.018182,1,200,200,200,1130.949099,9315.219071,no trend,difference_stationary,
KDD-TSAD,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
KDD-TSAD,246_UCR_Anomaly_tilt12755mtable,univariate/KDD-TSAD/246_UCR_Anomaly_tilt12755m...,univariate/KDD-TSAD/246_UCR_Anomaly_tilt12755m...,synthetic,False,100211.0,semi-supervised,True,univariate,299867,1,0.000900,1,270,270,270,5041.727730,889.323740,no trend,not_stationary,
KDD-TSAD,247_UCR_Anomaly_tilt12755mtable,univariate/KDD-TSAD/247_UCR_Anomaly_tilt12755m...,univariate/KDD-TSAD/247_UCR_Anomaly_tilt12755m...,synthetic,False,50211.0,semi-supervised,True,univariate,200000,1,0.000400,1,80,80,80,4978.535763,876.777608,no trend,not_stationary,
KDD-TSAD,248_UCR_Anomaly_weallwalk,univariate/KDD-TSAD/248_UCR_Anomaly_weallwalk....,univariate/KDD-TSAD/248_UCR_Anomaly_weallwalk....,synthetic,False,2000.0,semi-supervised,True,univariate,8432,1,0.000593,1,5,5,5,-1.185222,0.398741,no trend,not_stationary,
KDD-TSAD,249_UCR_Anomaly_weallwalk,univariate/KDD-TSAD/249_UCR_Anomaly_weallwalk....,univariate/KDD-TSAD/249_UCR_Anomaly_weallwalk....,synthetic,False,2753.0,semi-supervised,True,univariate,10524,1,0.002851,1,30,30,30,-1.185866,0.392685,no trend,not_stationary,


## Exploration

In [None]:
datasets = find_datasets(source_folder)
[d.name for d in datasets]

In [None]:
f = datasets[3]
data = np.genfromtxt(f)
anomaly = tuple(int(idx) for idx in f.stem.split("_")[-2:])
split_at = int(f.stem.split("_")[-3])
df = pd.DataFrame(data, columns=["value"])
df["is_anomaly"] = 0
df.loc[range(anomaly[0], anomaly[1]), "is_anomaly"] = 1
df

In [None]:
df.plot()
plt.gca().add_patch(matplotlib.patches.Rectangle(
    (anomaly[0], data.min()),
    anomaly[1]-anomaly[0],
    data.max()-data.min(),
    color="yellow", alpha=0.75
))
plt.xlim(anomaly[0]-1500, anomaly[1]+1500)
plt.show()

In [None]:
plot_dataset(datasets[1])
plt.show()

In [None]:
from timeeval.utils.metrics import Metric

def print_metrics(labels, scores):
    metrics = []
    for metric in [Metric.ROC_AUC, Metric.PR_AUC, Metric.AVERAGE_PRECISION]:
        try:
            metrics.append((metric.name, metric(labels, scores)))
        except Exception as e:
            print(f"Error in calculation of {metric.name}: {repr(e)}")
    display(pd.DataFrame(metrics, columns=["Name", "Value"]).set_index("Name").T)

# load datasets and ground truth
f = datasets[1]
data = np.genfromtxt(f)
anomaly = tuple(int(idx) for idx in f.stem.split("_")[-2:])
split_at = int(f.stem.split("_")[-3])
df = pd.DataFrame(data, columns=["value"])
df["is_anomaly"] = 0
df.loc[range(anomaly[0], anomaly[1]), "is_anomaly"] = 1
df

In [None]:
scores = np.linspace(0, 1, num=df.shape[0], dtype=np.float_)
print_metrics(df["is_anomaly"], scores)

plt.plot(scores, label="scores")
plt.plot(df["is_anomaly"], label="labels")
plt.show()

In [None]:
scores = np.zeros(df.shape[0])
scores[split_at:] = np.linspace(0, 1, num=df.shape[0]-split_at, dtype=np.float_)

print_metrics(df["is_anomaly"], scores)
plt.plot(scores, label="scores")
plt.plot(df["is_anomaly"], label="labels")
plt.show()

In [None]:
scores = np.linspace(0, 0.9, num=df.shape[0], dtype=np.float_)
scores[range(anomaly[0]-5, anomaly[1]+5)] = 1

print_metrics(df["is_anomaly"], scores)
plt.plot(scores, label="scores")
plt.plot(df["is_anomaly"], label="labels")
plt.show()

In [None]:
scores = np.zeros(df.shape[0])
scores[split_at:] = np.linspace(0, 0.9, num=df.shape[0]-split_at, dtype=np.float_)
scores[range(anomaly[0]-5, anomaly[1]+5)] = 1

print_metrics(df["is_anomaly"], scores)
plt.plot(scores, label="scores")
plt.plot(df["is_anomaly"], label="labels")
plt.show()

In [None]:
scores = np.zeros(df.shape[0])
scores[split_at:] = np.linspace(0, 0.9, num=df.shape[0]-split_at, dtype=np.float_)
scores[10100:10400] = 1
scores[range(anomaly[0]-5, anomaly[1]+5)] = 1

print_metrics(df["is_anomaly"], scores)
plt.plot(scores, label="scores")
plt.plot(df["is_anomaly"], label="labels")
plt.show()