In [1]:
from typing import Optional, Iterable

import pandas as pd
import numpy as np

from time_series_anomaly_detection.abstractions import (
    TimeSeriesAnomalyDetector
)

import tensorflow as tf
from tensorflow.keras.layers import *

# this is xavier initializer as described in the paper
from tensorflow.keras.initializers import glorot_normal
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator 
from sklearn.preprocessing import StandardScaler

In [2]:
class SCVAEDetector(TimeSeriesAnomalyDetector):
    """
    Anomaly detector implemented as described in https://arxiv.org/pdf/1712.06343.pdf

    Parameters
    ----------
    id_columns: Iterable[str], optional
        ID columns used to identify individual time series.

        Should be specified in case the detector is provided with
        time series during training or inference with ID columns
        included. Using these columns the detector can separate individual
        time series and not use ID columns as feature columns.
        In case they are not specified, all columns are regarded as feature
        columns and the provided data is regarded as a single time series.
    """

    def __init__(
        self,
        id_columns: Optional[Iterable[str]] = None,
        latent_dim: Optional[int] = 50
    ):
        super().__init__()
        self._id_columns = id_columns
        self.latent_dim = latent_dim

    def predict_anomaly_scores(
        self, X: pd.DataFrame, *args, **kwargs
    ) -> pd.Series:
        # TODO: return predicted anomaly scores for the given samples
        pass

    def fit(self, X: pd.DataFrame, *args, **kwargs) -> None:
        # TODO: perform training
        pass


In [3]:
def Encoder(x):    
    
    # Fire module 
    
    # squeeze convolution
    x = Conv1D(16, kernel_size=1, stride=1, kernel_initializer=glorot_normal, padding='same')(x)
    x = Activation('relu')(x)
    x = BatchNormalization(momentum=0.9)(x)

    # extend (expand) convolutions
    extend1 = Conv1D(16, kernel_size=1, stride=1, kernel_initializer=glorot_normal, padding='same')(x)
    extend1 = Activation('relu')(extend1)
    extend1 = BatchNormalization(momentum=0.9)(extend1)

    extend2 = Conv1D(32, kernel_size=3, stride=1, kernel_initializer=glorot_normal, padding='same')(x)
    extend2 = Activation('relu')(extend2)
    extend2 = BatchNormalization(momentum=0.9)(extend2)
    x = Concatenate()([extend1, extend1])
    
    # Fully connected layers
    
    mean = Dense(50)(x)
    stdev = Dense(50)(x)
    
    return x

def Decoder(x):    
    # squeeze convolution
    x = Conv1DTranspose(16, kernel_size=1, stride=1, kernel_initializer=glorot_normal, padding='same')(x)
    x = Activation('relu')(x)
    x = BatchNormalization(momentum=0.9)(x)

    # extend (expand) convolutions
    extend1 = Conv1DTranspose(16, kernel_size=1, stride=1, kernel_initializer=glorot_normal, padding='same')(x)
    extend1 = Activation('relu')(extend1)
    extend1 = BatchNormalization(momentum=0.9)(extend1)

    extend2 = Conv1DTranspose(1, kernel_size=3, stride=1, kernel_initializer=glorot_normal, padding='same')(x)
    extend2 = Activation('relu')(extend2)
    extend2 = BatchNormalization(momentum=0.9)(extend2)

    x = Concatenate()([extend1, extend1])
    return x

In [4]:
# dummy layers for now
def build_scvae():
    inputs = keras.Input(shape=(mnist.shape[1],mnist.shape[2],mnist.shape[3]), name="damaged_input")    
    # Encoder transforming image data to "encoder features", or a bottleneck layer
    x = layers.Conv2D(32,4)(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    output = layers.Conv2DTranspose(1, 3, activation='tanh')(x)
    autoencoder = keras.Model(inputs, output, name='autoencoder')
    display(autoencoder.summary())
    return autoencoder

In [7]:
def load_skab_valve1():
    all_series = []

    for file_num in range(16):
        df = pd.read_csv(f'../datasets/skab/valve1/{file_num}.csv',sep=';').drop(columns=['datetime', 'changepoint'])
        df['id_1'] = file_num
        df.pop('anomaly')
        all_series.append(df)
    
    id_cols = ['id_1']
    
    return pd.concat(all_series), id_cols

def split_multiple_timeseries_by_id(df_series,id_cols):
    all_series = [pd.DataFrame(y).drop(id_cols,axis=1) for x, y in df_series.groupby(id_cols, as_index=False)]
    anomalies = [all_series[i] for i in range(len(all_series))]
    return all_series, anomalies

data, indicators = load_skab_valve1()

In [31]:
class MultipleTimeseriesGenerator(Sequence):    
    def __init__(self, df_list, anomaly_list=None, time_window=8, shuffle=False, batch_size=128):
        if anomaly_list is None:
            self.anomaly_list = [np.zeros(len(series)) for series in df_list]
        else:
            self.anomaly_list = anomaly_list
            
        self.batch_size = batch_size
        self.generators = [TimeseriesGenerator(np.array(df_list[i]),np.array(anomaly_list[i]),length=time_window, 
                                               batch_size=batch_size, shuffle=shuffle) for i in range(len(df_list))]
        
        self.generator_lengths = [len(g) for g in self.generators]
        self.generator_indexes = np.cumsum(self.generator_lengths)
        self.len = np.sum(self.generator_lengths)

    def __len__(self):
        return self.len

    def __getitem__(self, index):
        # which series contains this index
        time_series_index = np.where(self.generator_indexes>index)[0][0]
        
        # get generator for the series, calculate position within than series and get its element
        element = self.generators[time_series_index][index % self.generator_indexes[max(0,time_series_index-1)]]
        return element

In [33]:
scaler = StandardScaler()
df[df.columns] = scaler.fit_transform(df[df.columns])
mtg = MultipleTimeseriesGenerator(data, indicators, time_window=8, shuffle=True, batch_size=16)

NameError: name 'df' is not defined