# Prepare Anomaly Labels

In this notebook we use the `labeled_anomalies.csv` file from the telemanom project
and convert it to the CSV that we will later on use in Orion.

In [1]:
import pandas as pd

In [2]:
from orion.data import load_signal

In [3]:
CSV_URL = 'https://github.com/khundman/telemanom/raw/master/labeled_anomalies.csv'

In [4]:
df = pd.read_csv(CSV_URL)

In [5]:
df.head()

Unnamed: 0,chan_id,spacecraft,anomaly_sequences,class,num_values
0,P-1,SMAP,"[[1899, 2099], [4286, 4594], [3289, 3529]]","[contextual, contextual, contextual]",8505
1,S-1,SMAP,"[[5050, 5497]]",[point],7331
2,E-1,SMAP,"[[4750, 4780], [5360, 5836]]","[contextual, contextual]",8516
3,E-2,SMAP,"[[5348, 6745]]",[point],8532
4,E-3,SMAP,"[[4844, 8124]]",[point],8307


In [6]:
import json

labels_data = list()

for _, row in df.iterrows():
    signal = row.chan_id
    data = load_signal('csv/' + signal + '.csv')
    test = data[-row.num_values:]
    
    events = list()
    for start, end in json.loads(row.anomaly_sequences):
        start_ts = test.iloc[start].timestamp.astype(int)
        end_ts = test.iloc[end].timestamp.astype(int)
        events.append([start_ts, end_ts])
    
    labels_data.append({
        'signal': signal,
        'events': events
    })
    
labels = pd.DataFrame(labels_data)[['signal','events']]

In [7]:
labels.head()

Unnamed: 0,signal,events
0,P-1,"[[1325872800, 1330192800], [1377432000, 138408..."
1,S-1,"[[1392768000, 1402423200]]"
2,E-1,"[[1387627200, 1388275200], [1400803200, 141108..."
3,E-2,"[[1400544000, 1430719200]]"
4,E-3,"[[1389657600, 1460505600]]"


In [8]:
labels.to_csv('labels.csv', index=False)