In [117]:
import pandas as pd
import sys
sys.path.append("../fraud_detection/src/")
from util import s_to_time_format, string_to_datetime,hour_to_range

df_train = pd.read_csv("/data/yunrui_li/fraud/dataset/train.csv")
df_test = pd.read_csv("/data/yunrui_li/fraud/dataset/test.csv")


for df in [df_train, df_test]:
    # pre-processing
    df["loctm_"] = df.loctm.astype(int).astype(str)
    df.loctm_ = df.loctm_.apply(s_to_time_format).apply(string_to_datetime)
    # time-related feature
    df["loctm_hour_of_day"] = df.loctm_.apply(lambda x: x.hour)
    df["loctm_minute_of_hour"] = df.loctm_.apply(lambda x: x.minute)
    df["loctm_second_of_min"] = df.loctm_.apply(lambda x: x.second)

    # removed the columns no need
    df.drop(columns = ["loctm_", "loctm","txkey"], axis = 1, inplace = True)


In [118]:
from tqdm import tqdm

def value_to_count(df_train, df_test):

    # continuous_feats = ["locdt","conam","loctm_hour_of_day",
    #                 "loctm_minute_of_hour","loctm_second_of_min"]

    # feats = [f for f in df_test.columns.tolist() if f not in continuous_feats]
    feats = ['acqic', 'bacno', 'cano', 'conam', 'contp', 'csmcu', 'ecfg', 'etymd',
       'flbmk', 'flg_3dsmk', 'hcefg', 'insfg', 'iterm', 'mcc',
       'mchno', 'ovrlt', 'scity', 'stocn', 'stscd']

    df = pd.concat([df_train[feats], df_test[feats]], axis = 0)
    for f in tqdm(feats):
        count_dict = df[f].value_counts(dropna = False).to_dict() 
        df_train[f] = df_train[f].apply(lambda v: count_dict[v])
        df_test[f] = df_test[f].apply(lambda v: count_dict[v])
        
#     continuous_feats = ['locdt', 'loctm_hour_of_day', 'loctm_minute_of_hour', 'loctm_second_of_min']
#     for f in tqdm(continuous_feats):
#         df_train_[f] = df_train[f]
#         df_test_[f] = df_test[f]
        
#     if mode == 'train':
#         df_train_["fraud_ind"] = df_train["fraud_ind"]

    return df_train, df_test

def feature_normalization_auto(df_train, df_test):
    """
    return two inputs of autoencoder, one is for train and another one is for test
    """
    from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler
    feats = ['acqic', 'bacno', 'cano', 'conam', 'contp', 'csmcu', 'ecfg', 'etymd',
       'flbmk', 'flg_3dsmk', 'hcefg', 'insfg', 'iterm', 'locdt', 'mcc',
       'mchno', 'ovrlt', 'scity', 'stocn', 'stscd', 'loctm_hour_of_day',
       'loctm_minute_of_hour', 'loctm_second_of_min']
    df = pd.concat([df_train[feats], df_test[feats]], axis = 0)


    for f in tqdm(feats):
        try:
            #scaler = MinMaxScaler()
            max_ = df[f].max()
            min_ = df[f].min()
            df_train[f] = df_train[f].apply(lambda x: (x-min_)/(max_-min_))
            df_test[f] = df_test[f].apply(lambda x: (x-min_)/(max_-min_))
        except:
            print(f)
    return df_train, df_test



In [119]:
pd.options.display.max_columns = 100
df_train["cano_locdt_index"] = ["{}_{}".format(str(i),str(j)) for i,j in zip(df_train.cano,df_train.locdt)]
df_test["cano_locdt_index"] = ["{}_{}".format(str(i),str(j)) for i,j in zip(df_test.cano,df_test.locdt)]

In [120]:
#from autoencoder import value_to_count,feature_normalization_auto
df_train, df_test = value_to_count(df_train, df_test)
df_train, df_test = feature_normalization_auto(df_train, df_test)


100%|██████████| 19/19 [00:13<00:00,  1.36it/s]
100%|██████████| 23/23 [00:31<00:00,  1.37s/it]


In [121]:
df_train

Unnamed: 0,acqic,bacno,cano,conam,contp,csmcu,ecfg,etymd,flbmk,flg_3dsmk,fraud_ind,hcefg,insfg,iterm,locdt,mcc,mchno,ovrlt,scity,stocn,stscd,loctm_hour_of_day,loctm_minute_of_hour,loctm_second_of_min,cano_locdt_index
0,0.393835,0.070789,0.070789,1.000000,1.0,0.142137,1.0,0.324602,1.000000,1.0,0,1.000000,1.0,1.0,0.268908,0.538547,0.646630,1.0,0.196151,1.0,1.0,0.739130,0.440678,0.881356,38038_33
1,0.191005,0.019713,0.019713,0.363024,1.0,0.142137,1.0,0.955971,1.000000,1.0,0,0.038308,1.0,1.0,0.067227,0.233053,1.000000,1.0,1.000000,1.0,1.0,0.434783,0.864407,0.237288,45725_9
2,0.393835,0.015233,0.015233,1.000000,1.0,0.142137,1.0,0.324602,1.000000,1.0,0,1.000000,1.0,1.0,0.042017,0.538547,0.646630,1.0,0.196151,1.0,1.0,0.652174,0.406780,0.983051,188328_6
3,0.794559,0.065412,0.050179,0.001142,1.0,1.000000,1.0,1.000000,1.000000,1.0,0,1.000000,1.0,1.0,0.033613,0.719283,0.015788,1.0,0.018220,1.0,1.0,0.739130,0.491525,0.779661,29967_5
4,0.497653,0.068996,0.055556,0.002165,1.0,1.000000,1.0,0.783407,1.000000,1.0,0,1.000000,1.0,1.0,0.042017,0.568704,0.000738,1.0,1.000000,1.0,1.0,0.782609,0.355932,0.491525,81305_6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1521782,0.042507,0.008961,0.008961,0.002075,1.0,0.010258,0.0,0.612633,0.005172,0.0,0,0.009036,1.0,1.0,0.025210,0.088207,0.026539,1.0,1.000000,1.0,1.0,0.826087,0.271186,0.711864,15189_4
1521783,0.036421,0.051971,0.049283,0.004056,1.0,0.010258,0.0,0.612633,0.005172,0.0,0,0.009036,1.0,1.0,0.100840,0.159732,0.095499,1.0,0.015205,1.0,1.0,0.434783,0.389831,0.644068,116252_13
1521784,1.000000,0.011649,0.011649,0.076722,1.0,0.010258,0.0,0.612633,0.005172,0.0,0,0.009036,1.0,1.0,0.235294,0.118164,0.083791,1.0,1.000000,1.0,1.0,1.000000,0.779661,0.305085,93598_29
1521785,0.245115,0.023297,0.023297,0.076722,1.0,0.010258,0.0,0.955971,0.005172,0.0,0,0.009036,1.0,1.0,0.193277,0.118164,0.083791,1.0,1.000000,1.0,1.0,0.913043,0.881356,0.305085,197460_24


# prepare training data

In [122]:
# 把卡號中有fraud的拿掉
fraud_cano_id = [int(i) for i in df_train[df_train.fraud_ind == 1].cano.unique().tolist()]
fraud_cano_id = df_train[df_train.fraud_ind == 1].cano.unique().tolist()

df_train_normal_cano_id = df_train[~df_train.cano.isin(fraud_cano_id)]

In [123]:
df_train_normal_cano_id.shape

(34956, 25)

In [124]:
len(fraud_cano_id)

141

In [125]:
df_train_normal_cano_id.fraud_ind.value_counts()

0    34956
Name: fraud_ind, dtype: int64

In [126]:
df_train_normal_cano_id.cano.isnull().value_counts()

False    34956
Name: cano, dtype: int64

In [47]:
df_train_normal_cano_id.sort_values(by = ["cano", "locdt"], inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [127]:
df_train_normal_cano_id

Unnamed: 0,acqic,bacno,cano,conam,contp,csmcu,ecfg,etymd,flbmk,flg_3dsmk,fraud_ind,hcefg,insfg,iterm,locdt,mcc,mchno,ovrlt,scity,stocn,stscd,loctm_hour_of_day,loctm_minute_of_hour,loctm_second_of_min,cano_locdt_index
53,0.191005,0.149642,0.149642,0.363024,1.0,0.142137,1.0,0.955971,1.000000,1.0,0,0.038308,1.0,1.0,0.050420,0.233053,1.000000,1.0,1.000000,1.000000,1.0,0.434783,0.745763,0.000000,185426_7
57,0.191005,0.249104,0.248208,0.363024,1.0,0.142137,1.0,0.955971,1.000000,1.0,0,0.038308,1.0,1.0,0.042017,0.233053,1.000000,1.0,1.000000,1.000000,1.0,0.434783,0.847458,0.152542,91061_6
62,0.191005,1.000000,1.000000,0.363024,1.0,0.142137,1.0,0.955971,1.000000,1.0,0,0.038308,1.0,1.0,0.075630,0.233053,1.000000,1.0,1.000000,1.000000,1.0,0.434783,0.779661,0.440678,71502_10
73,0.191005,0.668459,0.668459,0.363024,1.0,0.142137,1.0,0.955971,1.000000,1.0,0,0.038308,1.0,1.0,0.126050,0.233053,1.000000,1.0,1.000000,1.000000,1.0,0.434783,0.864407,0.220339,188447_16
124,0.393835,0.115591,0.106631,1.000000,1.0,0.142137,1.0,0.324602,1.000000,1.0,0,1.000000,1.0,1.0,0.100840,0.538547,0.037760,1.0,0.196151,1.000000,1.0,0.652174,0.355932,0.050847,145699_13
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1521651,0.794559,0.120072,0.120072,0.005750,1.0,0.010258,1.0,0.783407,0.005172,0.0,0,0.009036,1.0,1.0,0.193277,0.135549,0.000063,1.0,0.009243,1.000000,1.0,0.608696,1.000000,0.813559,161412_24
1521691,1.000000,0.111111,0.111111,0.076722,1.0,0.010258,0.0,0.612633,0.005172,0.0,0,0.009036,1.0,1.0,0.067227,0.118164,0.083791,1.0,1.000000,1.000000,1.0,1.000000,0.983051,0.762712,156143_9
1521700,1.000000,0.407706,0.132616,0.076722,1.0,0.010258,0.0,0.612633,0.005172,0.0,0,0.009036,1.0,1.0,0.117647,0.118164,0.083791,1.0,1.000000,1.000000,1.0,0.521739,0.118644,0.254237,116703_15
1521764,0.321774,0.175627,0.175627,0.007861,1.0,0.010258,0.0,0.612633,0.005172,0.0,0,0.009036,1.0,1.0,0.201681,0.198603,0.051728,1.0,1.000000,1.000000,1.0,0.565217,0.067797,0.898305,10882_25


In [112]:
import numpy as np

def partition_(df, num_features):
    data = []
    for i in range(len(df)):
        out = None
        if i == 0:
            out = np.concatenate(((np.zeros((2,num_features))),df.iloc[:1].values))
        elif i== 1:
            out = np.concatenate(((np.zeros((1,num_features))),df.iloc[:i+1].values))
        else:
            out = df.iloc[i+1-3:i+1].values
        data.append(out)
    return data

def partition(df_, sequence_length = 3):
    feats = [f for f in df_.columns if f not in {"fraud_ind"}]
    sequences = []
    for _, df in df_[feats].groupby(by = "cano"):
        data = partition_(df[feats], num_features = len(feats))
        for d in data:
            sequences.append(d)
    return sequences

df_train_sequences = partition(df_train_normal_cano_id.iloc[:100])

In [114]:
X_train = np.concatenate(df_train_sequences)
X_train = pd.DataFrame(X_train)

In [49]:


# sequence_length = 3
# feats = [f for f in df_train_normal_cano_id.columns if f not in {"fraud_ind"}]
# sequences = []
# for _, df in df_train_normal_cano_id[feats].groupby(by = "cano"):
#     print (_)
#     data = partition_(df[feats], num_features = len(feats))
    
#     for d in data:
#         sequences.append(d)
#     break

0.08064516129032258


In [50]:
len(sequences)

1606

In [51]:
X_train = np.concatenate(sequences)
X_train.shape

(4818, 24)

In [115]:
X_train = pd.DataFrame(X_train)

In [116]:
pd.options.display.max_rows = 100

X_train.head(100)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0
2,0.218003,0.0806452,0.0806452,0.00778997,1,1.0,0,0.955971,1,1,1,1,1,0.0,0.376934,0.0447597,1,1.0,1.0,1,0.869565,0.508475,0.423729,3749_1
3,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0,0,0,0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0
4,0.218003,0.0806452,0.0806452,0.00778997,1,1.0,0,0.955971,1,1,1,1,1,0.0,0.376934,0.0447597,1,1.0,1.0,1,0.869565,0.508475,0.423729,3749_1
5,0.497653,0.0806452,0.0806452,0.0263015,1,1.0,1,0.783407,1,1,1,1,1,0.0,0.568704,0.000345271,1,1.0,1.0,1,0.304348,0.338983,0.677966,210775_1
6,0.218003,0.0806452,0.0806452,0.00778997,1,1.0,0,0.955971,1,1,1,1,1,0.0,0.376934,0.0447597,1,1.0,1.0,1,0.869565,0.508475,0.423729,3749_1
7,0.497653,0.0806452,0.0806452,0.0263015,1,1.0,1,0.783407,1,1,1,1,1,0.0,0.568704,0.000345271,1,1.0,1.0,1,0.304348,0.338983,0.677966,210775_1
8,0.393835,0.0842294,0.0806452,0.152505,1,0.142137,1,0.324602,1,1,1,1,1,0.0,0.538547,0.0111428,1,0.196151,1.0,1,0.434783,0.79661,0.423729,75818_1
9,0.497653,0.0806452,0.0806452,0.0263015,1,1.0,1,0.783407,1,1,1,1,1,0.0,0.568704,0.000345271,1,1.0,1.0,1,0.304348,0.338983,0.677966,210775_1


# modeling

In [66]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import trange
import sys
sys.path.append("../DeepADoTS/src/algorithms/")
# from .algorithm_utils import Algorithm, PyTorchUtils
# from .autoencoder import AutoEncoderModule
#from lstm_enc_dec_axl import LSTMEDModule
import abc
import logging
import random

import numpy as np
import torch
import tensorflow as tf
from tensorflow.python.client import device_lib
from torch.autograd import Variable


class Algorithm(metaclass=abc.ABCMeta):
    def __init__(self, module_name, name, seed, details=False):
        self.logger = logging.getLogger(module_name)
        self.name = name
        self.seed = seed
        self.details = details
        self.prediction_details = {}

        if self.seed is not None:
            random.seed(seed)
            np.random.seed(seed)

    def __str__(self):
        return self.name

    @abc.abstractmethod
    def fit(self, X):
        """
        Train the algorithm on the given dataset
        """

    @abc.abstractmethod
    def predict(self, X):
        """
        :return anomaly score
        """


class PyTorchUtils(metaclass=abc.ABCMeta):
    def __init__(self, seed, gpu):
        self.gpu = gpu
        self.seed = seed
        if self.seed is not None:
            torch.manual_seed(self.seed)
            torch.cuda.manual_seed(self.seed)
        self.framework = 0

    @property
    def device(self):
        return torch.device(f'cuda:{self.gpu}' if torch.cuda.is_available() and self.gpu is not None else 'cpu')

    def to_var(self, t, **kwargs):
        # ToDo: check whether cuda Variable.
        t = t.to(self.device)
        return Variable(t, **kwargs)

    def to_device(self, model):
        model.to(self.device)


class TensorflowUtils(metaclass=abc.ABCMeta):
    def __init__(self, seed, gpu):
        self.gpu = gpu
        self.seed = seed
        if self.seed is not None:
            tf.set_random_seed(seed)
        self.framework = 1

    @property
    def device(self):
        local_device_protos = device_lib.list_local_devices()
        gpus = [x.name for x in local_device_protos if x.device_type == 'GPU']
        return tf.device(gpus[self.gpu] if gpus and self.gpu is not None else '/cpu:0')
    
import logging

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from scipy.stats import multivariate_normal
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from tqdm import trange


class AutoEncoder(Algorithm, PyTorchUtils):
    def __init__(self, name: str='AutoEncoder', num_epochs: int=10, batch_size: int=20, lr: float=1e-3,
                 hidden_size: int=5, sequence_length: int=30, train_gaussian_percentage: float=0.25,
                 seed: int=None, gpu: int=None, details=True):
        Algorithm.__init__(self, __name__, name, seed, details=details)
        PyTorchUtils.__init__(self, seed, gpu)
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.lr = lr

        self.hidden_size = hidden_size
        self.sequence_length = sequence_length
        self.train_gaussian_percentage = train_gaussian_percentage

        self.aed = None
        self.mean, self.cov = None, None

    def fit(self, X: pd.DataFrame):
        X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        indices = np.random.permutation(len(sequences))
        split_point = int(self.train_gaussian_percentage * len(sequences))
        train_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                  sampler=SubsetRandomSampler(indices[:-split_point]), pin_memory=True)
        train_gaussian_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                           sampler=SubsetRandomSampler(indices[-split_point:]), pin_memory=True)

        self.aed = AutoEncoderModule(X.shape[1], self.sequence_length, self.hidden_size, seed=self.seed, gpu=self.gpu)
        self.to_device(self.aed)  # .double()
        optimizer = torch.optim.Adam(self.aed.parameters(), lr=self.lr)

        self.aed.train()
        for epoch in trange(self.num_epochs):
            logging.debug(f'Epoch {epoch+1}/{self.num_epochs}.')
            for ts_batch in train_loader:
                output = self.aed(self.to_var(ts_batch))
                loss = nn.MSELoss(size_average=False)(output, self.to_var(ts_batch.float()))
                self.aed.zero_grad()
                loss.backward()
                optimizer.step()

        self.aed.eval()
        error_vectors = []
        for ts_batch in train_gaussian_loader:
            output = self.aed(self.to_var(ts_batch))
            error = nn.L1Loss(reduce=False)(output, self.to_var(ts_batch.float()))
            error_vectors += list(error.view(-1, X.shape[1]).data.cpu().numpy())

        self.mean = np.mean(error_vectors, axis=0)
        self.cov = np.cov(error_vectors, rowvar=False)

    def predict(self, X: pd.DataFrame) -> np.array:
        X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        data_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, shuffle=False, drop_last=False)

        self.aed.eval()
        mvnormal = multivariate_normal(self.mean, self.cov, allow_singular=True)
        scores = []
        outputs = []
        errors = []
        for idx, ts in enumerate(data_loader):
            output = self.aed(self.to_var(ts))
            error = nn.L1Loss(reduce=False)(output, self.to_var(ts.float()))
            score = -mvnormal.logpdf(error.view(-1, X.shape[1]).data.cpu().numpy())
            scores.append(score.reshape(ts.size(0), self.sequence_length))
            if self.details:
                outputs.append(output.data.numpy())
                errors.append(error.data.numpy())

        # stores seq_len-many scores per timestamp and averages them
        scores = np.concatenate(scores)
        lattice = np.full((self.sequence_length, X.shape[0]), np.nan)
        for i, score in enumerate(scores):
            lattice[i % self.sequence_length, i:i + self.sequence_length] = score
        scores = np.nanmean(lattice, axis=0)

        if self.details:
            outputs = np.concatenate(outputs)
            lattice = np.full((self.sequence_length, X.shape[0], X.shape[1]), np.nan)
            for i, output in enumerate(outputs):
                lattice[i % self.sequence_length, i:i + self.sequence_length, :] = output
            self.prediction_details.update({'reconstructions_mean': np.nanmean(lattice, axis=0).T})

            errors = np.concatenate(errors)
            lattice = np.full((self.sequence_length, X.shape[0], X.shape[1]), np.nan)
            for i, error in enumerate(errors):
                lattice[i % self.sequence_length, i:i + self.sequence_length, :] = error
            self.prediction_details.update({'errors_mean': np.nanmean(lattice, axis=0).T})

        return scores


class AutoEncoderModule(nn.Module, PyTorchUtils):
    def __init__(self, n_features: int, sequence_length: int, hidden_size: int, seed: int, gpu: int):
        # Each point is a flattened window and thus has as many features as sequence_length * features
        super().__init__()
        PyTorchUtils.__init__(self, seed, gpu)
        input_length = n_features * sequence_length

        # creates powers of two between eight and the next smaller power from the input_length
        dec_steps = 2 ** np.arange(max(np.ceil(np.log2(hidden_size)), 2), np.log2(input_length))[1:]
        dec_setup = np.concatenate([[hidden_size], dec_steps.repeat(2), [input_length]])
        enc_setup = dec_setup[::-1]

        layers = np.array([[nn.Linear(int(a), int(b)), nn.Tanh()] for a, b in enc_setup.reshape(-1, 2)]).flatten()[:-1]
        self._encoder = nn.Sequential(*layers)
        self.to_device(self._encoder)

        layers = np.array([[nn.Linear(int(a), int(b)), nn.Tanh()] for a, b in dec_setup.reshape(-1, 2)]).flatten()[:-1]
        self._decoder = nn.Sequential(*layers)
        self.to_device(self._decoder)

    def forward(self, ts_batch, return_latent: bool=False):
        flattened_sequence = ts_batch.view(ts_batch.size(0), -1)
        enc = self._encoder(flattened_sequence.float())
        dec = self._decoder(enc)
        reconstructed_sequence = dec.view(ts_batch.size())
        return (reconstructed_sequence, enc) if return_latent else reconstructed_sequence

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [67]:
import logging

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from scipy.stats import multivariate_normal
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from tqdm import trange

# from .algorithm_utils import Algorithm, PyTorchUtils


class LSTMED(Algorithm, PyTorchUtils):
    def __init__(self, name: str='LSTM-ED', num_epochs: int=10, batch_size: int=20, lr: float=1e-3,
                 hidden_size: int=5, sequence_length: int=30, train_gaussian_percentage: float=0.25,
                 n_layers: tuple=(1, 1), use_bias: tuple=(True, True), dropout: tuple=(0, 0),
                 seed: int=None, gpu: int = None, details=True):
        Algorithm.__init__(self, __name__, name, seed, details=details)
        PyTorchUtils.__init__(self, seed, gpu)
        self.num_epochs = num_epochs
        self.batch_size = batch_size
        self.lr = lr

        self.hidden_size = hidden_size
        self.sequence_length = sequence_length
        self.train_gaussian_percentage = train_gaussian_percentage

        self.n_layers = n_layers
        self.use_bias = use_bias
        self.dropout = dropout

        self.lstmed = None
        self.mean, self.cov = None, None

    def fit(self, X: pd.DataFrame):
        X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        indices = np.random.permutation(len(sequences))
        split_point = int(self.train_gaussian_percentage * len(sequences))
        train_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                  sampler=SubsetRandomSampler(indices[:-split_point]), pin_memory=True)
        train_gaussian_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, drop_last=True,
                                           sampler=SubsetRandomSampler(indices[-split_point:]), pin_memory=True)

        self.lstmed = LSTMEDModule(X.shape[1], self.hidden_size,
                                   self.n_layers, self.use_bias, self.dropout,
                                   seed=self.seed, gpu=self.gpu)
        self.to_device(self.lstmed)
        optimizer = torch.optim.Adam(self.lstmed.parameters(), lr=self.lr)

        self.lstmed.train()
        for epoch in trange(self.num_epochs):
            logging.debug(f'Epoch {epoch+1}/{self.num_epochs}.')
            for ts_batch in train_loader:
                output = self.lstmed(self.to_var(ts_batch))
                loss = nn.MSELoss(size_average=False)(output, self.to_var(ts_batch.float()))
                self.lstmed.zero_grad()
                loss.backward()
                optimizer.step()

        self.lstmed.eval()
        error_vectors = []
        for ts_batch in train_gaussian_loader:
            output = self.lstmed(self.to_var(ts_batch))
            error = nn.L1Loss(reduce=False)(output, self.to_var(ts_batch.float()))
            error_vectors += list(error.view(-1, X.shape[1]).data.cpu().numpy())

        self.mean = np.mean(error_vectors, axis=0)
        self.cov = np.cov(error_vectors, rowvar=False)

    def predict(self, X: pd.DataFrame):
        X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(data.shape[0] - self.sequence_length + 1)]
        data_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, shuffle=False, drop_last=False)

        self.lstmed.eval()
        mvnormal = multivariate_normal(self.mean, self.cov, allow_singular=True)
        scores = []
        outputs = []
        errors = []
        for idx, ts in enumerate(data_loader):
            output = self.lstmed(self.to_var(ts))
            error = nn.L1Loss(reduce=False)(output, self.to_var(ts.float()))
            score = -mvnormal.logpdf(error.view(-1, X.shape[1]).data.cpu().numpy())
            scores.append(score.reshape(ts.size(0), self.sequence_length))
            if self.details:
                outputs.append(output.data.numpy())
                errors.append(error.data.numpy())

        # stores seq_len-many scores per timestamp and averages them
        scores = np.concatenate(scores)
        lattice = np.full((self.sequence_length, data.shape[0]), np.nan)
        for i, score in enumerate(scores):
            lattice[i % self.sequence_length, i:i + self.sequence_length] = score
        scores = np.nanmean(lattice, axis=0)

        if self.details:
            outputs = np.concatenate(outputs)
            lattice = np.full((self.sequence_length, X.shape[0], X.shape[1]), np.nan)
            for i, output in enumerate(outputs):
                lattice[i % self.sequence_length, i:i + self.sequence_length, :] = output
            self.prediction_details.update({'reconstructions_mean': np.nanmean(lattice, axis=0).T})

            errors = np.concatenate(errors)
            lattice = np.full((self.sequence_length, X.shape[0], X.shape[1]), np.nan)
            for i, error in enumerate(errors):
                lattice[i % self.sequence_length, i:i + self.sequence_length, :] = error
            self.prediction_details.update({'errors_mean': np.nanmean(lattice, axis=0).T})

        return scores


class LSTMEDModule(nn.Module, PyTorchUtils):
    def __init__(self, n_features: int, hidden_size: int,
                 n_layers: tuple, use_bias: tuple, dropout: tuple,
                 seed: int, gpu: int):
        super().__init__()
        PyTorchUtils.__init__(self, seed, gpu)
        self.n_features = n_features
        self.hidden_size = hidden_size

        self.n_layers = n_layers
        self.use_bias = use_bias
        self.dropout = dropout

        self.encoder = nn.LSTM(self.n_features, self.hidden_size, batch_first=True,
                               num_layers=self.n_layers[0], bias=self.use_bias[0], dropout=self.dropout[0])
        self.to_device(self.encoder)
        self.decoder = nn.LSTM(self.n_features, self.hidden_size, batch_first=True,
                               num_layers=self.n_layers[1], bias=self.use_bias[1], dropout=self.dropout[1])
        self.to_device(self.decoder)
        self.hidden2output = nn.Linear(self.hidden_size, self.n_features)
        self.to_device(self.hidden2output)

    def _init_hidden(self, batch_size):
        return (self.to_var(torch.Tensor(self.n_layers[0], batch_size, self.hidden_size).zero_()),
                self.to_var(torch.Tensor(self.n_layers[0], batch_size, self.hidden_size).zero_()))

    def forward(self, ts_batch, return_latent: bool=False):
        batch_size = ts_batch.shape[0]

        # 1. Encode the timeseries to make use of the last hidden state.
        enc_hidden = self._init_hidden(batch_size)  # initialization with zero
        _, enc_hidden = self.encoder(ts_batch.float(), enc_hidden)  # .float() here or .double() for the model

        # 2. Use hidden state as initialization for our Decoder-LSTM
        dec_hidden = enc_hidden

        # 3. Also, use this hidden state to get the first output aka the last point of the reconstructed timeseries
        # 4. Reconstruct timeseries backwards
        #    * Use true data for training decoder
        #    * Use hidden2output for prediction
        output = self.to_var(torch.Tensor(ts_batch.size()).zero_())
        for i in reversed(range(ts_batch.shape[1])):
            output[:, i, :] = self.hidden2output(dec_hidden[0][0, :])

            if self.training:
                _, dec_hidden = self.decoder(ts_batch[:, i].unsqueeze(1).float(), dec_hidden)
            else:
                _, dec_hidden = self.decoder(output[:, i].unsqueeze(1), dec_hidden)

        return (output, enc_hidden[1][-1]) if return_latent else output

In [104]:
"""Adapted from Daniel Stanley Tan (https://github.com/danieltan07/dagmm)"""
import logging
import sys
sys.path.append("../DeepADoTS/src/algorithms/")

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import trange

# from .algorithm_utils import Algorithm, PyTorchUtils
# from .autoencoder import AutoEncoderModule
#from lstm_enc_dec_axl import LSTMEDModule


class DAGMM(Algorithm, PyTorchUtils):
    class AutoEncoder:
        NN = AutoEncoderModule
        LSTM = LSTMEDModule

    def __init__(self, num_epochs=10, lambda_energy=0.1, lambda_cov_diag=0.005, lr=1e-3, batch_size=50, gmm_k=3,
                 normal_percentile=80, sequence_length=30, autoencoder_type=AutoEncoderModule, autoencoder_args=None,
                 hidden_size: int=5, seed: int=None, gpu: int=None, details=True):
        _name = 'LSTM-DAGMM' if autoencoder_type == LSTMEDModule else 'DAGMM'
        Algorithm.__init__(self, __name__, _name, seed, details=details)
        PyTorchUtils.__init__(self, seed, gpu)
        self.num_epochs = num_epochs
        self.lambda_energy = lambda_energy
        self.lambda_cov_diag = lambda_cov_diag
        self.lr = lr
        self.batch_size = batch_size
        self.sequence_length = sequence_length
        self.gmm_k = gmm_k  # Number of Gaussian mixtures
        self.normal_percentile = normal_percentile  # Up to which percentile data should be considered normal
        self.autoencoder_type = autoencoder_type
        if autoencoder_type == AutoEncoderModule:
            self.autoencoder_args = ({'sequence_length': self.sequence_length})
        elif autoencoder_type == LSTMEDModule:
            self.autoencoder_args = ({'n_layers': (1, 1), 'use_bias': (True, True), 'dropout': (0.0, 0.0)})
        self.autoencoder_args.update({'seed': seed, 'gpu': gpu})
        if autoencoder_args is not None:
            self.autoencoder_args.update(autoencoder_args)
        self.hidden_size = hidden_size

        self.dagmm, self.optimizer, self.train_energy, self._threshold = None, None, None, None

    def reset_grad(self):
        self.dagmm.zero_grad()

    def dagmm_step(self, input_data):
        self.dagmm.train()
        enc, dec, z, gamma = self.dagmm(input_data)
        total_loss, sample_energy, recon_error, cov_diag = self.dagmm.loss_function(input_data, dec, z, gamma,
                                                                                    self.lambda_energy,
                                                                                    self.lambda_cov_diag)
        self.reset_grad()
        total_loss = torch.clamp(total_loss, max=1e7)  # Extremely high loss can cause NaN gradients
        total_loss.backward()
        torch.nn.utils.clip_grad_norm_(self.dagmm.parameters(), 5)
        # if np.array([np.isnan(p.grad.detach().numpy()).any() for p in self.dagmm.parameters()]).any():
        #     import IPython; IPython.embed()
        self.optimizer.step()
        return total_loss, sample_energy, recon_error, cov_diag

    def fit(self, X: pd.DataFrame):
        """Learn the mixture probability, mean and covariance for each component k.
        Store the computed energy based on the training data and the aforementioned parameters."""
        #X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(X.shape[0] - self.sequence_length + 1)]
        data_loader = DataLoader(dataset=sequences, batch_size=self.batch_size, shuffle=True, drop_last=True)
        self.hidden_size = 5 + int(X.shape[1] / 20)
        autoencoder = self.autoencoder_type(X.shape[1], hidden_size=self.hidden_size, **self.autoencoder_args)
        self.dagmm = DAGMMModule(autoencoder, n_gmm=self.gmm_k, latent_dim=self.hidden_size + 2,
                                 seed=self.seed, gpu=self.gpu)
        self.to_device(self.dagmm)
        self.optimizer = torch.optim.Adam(self.dagmm.parameters(), lr=self.lr)

        for _ in trange(self.num_epochs):
            for input_data in data_loader:
                input_data = self.to_var(input_data)
                self.dagmm_step(input_data.float())

        self.dagmm.eval()
        n = 0
        mu_sum = 0
        cov_sum = 0
        gamma_sum = 0
        for input_data in data_loader:
            input_data = self.to_var(input_data)
            _, _, z, gamma = self.dagmm(input_data.float())
            phi, mu, cov = self.dagmm.compute_gmm_params(z, gamma)

            batch_gamma_sum = torch.sum(gamma, dim=0)

            gamma_sum += batch_gamma_sum
            mu_sum += mu * batch_gamma_sum.unsqueeze(-1)  # keep sums of the numerator only
            cov_sum += cov * batch_gamma_sum.unsqueeze(-1).unsqueeze(-1)  # keep sums of the numerator only

            n += input_data.size(0)

    def predict(self, X: pd.DataFrame):
        """Using the learned mixture probability, mean and covariance for each component k, compute the energy on the
        given data."""
        self.dagmm.eval()
        #X.interpolate(inplace=True)
        X.bfill(inplace=True)
        data = X.values
        sequences = [data[i:i + self.sequence_length] for i in range(len(data) - self.sequence_length + 1)]
        data_loader = DataLoader(dataset=sequences, batch_size=1, shuffle=False)
        test_energy = np.full((self.sequence_length, X.shape[0]), np.nan)

        encodings = np.full((self.sequence_length, X.shape[0], self.hidden_size), np.nan)
        decodings = np.full((self.sequence_length, X.shape[0], X.shape[1]), np.nan)
        euc_errors = np.full((self.sequence_length, X.shape[0]), np.nan)
        csn_errors = np.full((self.sequence_length, X.shape[0]), np.nan)

        for i, sequence in enumerate(data_loader):
            print ("shape of sequence",self.to_var(sequence).float().shape)
            enc, dec, z, gamma = self.dagmm(self.to_var(sequence).float())
            sample_energy, _ = self.dagmm.compute_energy(z, size_average=False)
            idx = (i % self.sequence_length, np.arange(i, i + self.sequence_length))
            test_energy[idx] = sample_energy.data.numpy()

            if self.details:
                encodings[idx] = enc.data.numpy()
                decodings[idx] = dec.data.numpy()
                euc_errors[idx] = z[:, 1].data.numpy()
                csn_errors[idx] = z[:, 2].data.numpy()

        test_energy = np.nanmean(test_energy, axis=0)

        if self.details:
            self.prediction_details.update({'latent_representations': np.nanmean(encodings, axis=0).T})
            self.prediction_details.update({'reconstructions_mean': np.nanmean(decodings, axis=0).T})
            self.prediction_details.update({'euclidean_errors_mean': np.nanmean(euc_errors, axis=0)})
            self.prediction_details.update({'cosine_errors_mean': np.nanmean(csn_errors, axis=0)})

        return test_energy


class DAGMMModule(nn.Module, PyTorchUtils):
    """Residual Block."""

    def __init__(self, autoencoder, n_gmm, latent_dim, seed: int, gpu: int):
        super(DAGMMModule, self).__init__()
        PyTorchUtils.__init__(self, seed, gpu)

        self.add_module('autoencoder', autoencoder)

        layers = [
            nn.Linear(latent_dim, 10),
            nn.Tanh(),
            nn.Dropout(p=0.5),
            nn.Linear(10, n_gmm),
            nn.Softmax(dim=1)
        ]
        self.estimation = nn.Sequential(*layers)
        self.to_device(self.estimation)

        self.register_buffer('phi', self.to_var(torch.zeros(n_gmm)))
        self.register_buffer('mu', self.to_var(torch.zeros(n_gmm, latent_dim)))
        self.register_buffer('cov', self.to_var(torch.zeros(n_gmm, latent_dim, latent_dim)))

    def relative_euclidean_distance(self, a, b, dim=1):
        return (a - b).norm(2, dim=dim) / torch.clamp(a.norm(2, dim=dim), min=1e-10)

    def forward(self, x):
        dec, enc = self.autoencoder(x, return_latent=True)

        rec_cosine = F.cosine_similarity(x.view(x.shape[0], -1), dec.view(dec.shape[0], -1), dim=1)
        rec_euclidean = self.relative_euclidean_distance(x.view(x.shape[0], -1), dec.view(dec.shape[0], -1), dim=1)

        # Concatenate latent representation, cosine similarity and relative Euclidean distance between x and dec(enc(x))
        z = torch.cat([enc, rec_euclidean.unsqueeze(-1), rec_cosine.unsqueeze(-1)], dim=1)
        gamma = self.estimation(z)

        return enc, dec, z, gamma

    def compute_gmm_params(self, z, gamma):
        N = gamma.size(0)
        # K
        sum_gamma = torch.sum(gamma, dim=0)

        # K
        phi = (sum_gamma / N)

        self.phi = phi.data

        # K x D
        mu = torch.sum(gamma.unsqueeze(-1) * z.unsqueeze(1), dim=0) / sum_gamma.unsqueeze(-1)
        self.mu = mu.data
        # z = N x D
        # mu = K x D
        # gamma N x K

        # z_mu = N x K x D
        z_mu = (z.unsqueeze(1) - mu.unsqueeze(0))

        # z_mu_outer = N x K x D x D
        z_mu_outer = z_mu.unsqueeze(-1) * z_mu.unsqueeze(-2)

        # K x D x D
        cov = torch.sum(gamma.unsqueeze(-1).unsqueeze(-1) * z_mu_outer, dim=0) / sum_gamma.unsqueeze(-1).unsqueeze(-1)
        self.cov = cov.data

        return phi, mu, cov

    def compute_energy(self, z, phi=None, mu=None, cov=None, size_average=True):
        if phi is None:
            phi = Variable(self.phi)
        if mu is None:
            mu = Variable(self.mu)
        if cov is None:
            cov = Variable(self.cov)

        k, d, _ = cov.size()

        z_mu = (z.unsqueeze(1) - mu.unsqueeze(0))

        cov_inverse = []
        det_cov = []
        cov_diag = 0
        eps = 1e-12
        for i in range(k):
            # K x D x D
            cov_k = cov[i] + self.to_var(torch.eye(d) * eps)
            pinv = np.linalg.pinv(cov_k.data.numpy())
            cov_inverse.append(Variable(torch.from_numpy(pinv)).unsqueeze(0))

            eigvals = np.linalg.eigvals(cov_k.data.cpu().numpy() * (2 * np.pi))
            if np.min(eigvals) < 0:
                logging.warning(f'Determinant was negative! Clipping Eigenvalues to 0+epsilon from {np.min(eigvals)}')
            determinant = np.prod(np.clip(eigvals, a_min=sys.float_info.epsilon, a_max=None))
            det_cov.append(determinant)

            cov_diag = cov_diag + torch.sum(1 / cov_k.diag())

        # K x D x D
        cov_inverse = torch.cat(cov_inverse, dim=0)
        # K
        det_cov = Variable(torch.from_numpy(np.float32(np.array(det_cov))))

        # N x K
        exp_term_tmp = -0.5 * torch.sum(torch.sum(z_mu.unsqueeze(-1) * cov_inverse.unsqueeze(0), dim=-2) * z_mu, dim=-1)
        # for stability (logsumexp)
        max_val = torch.max((exp_term_tmp).clamp(min=0), dim=1, keepdim=True)[0]

        exp_term = torch.exp(exp_term_tmp - max_val)

        sample_energy = -max_val.squeeze() - torch.log(
            torch.sum(self.to_var(phi.unsqueeze(0)) * exp_term / (torch.sqrt(self.to_var(det_cov)) + eps).unsqueeze(0),
                      dim=1) + eps)

        if size_average:
            sample_energy = torch.mean(sample_energy)

        return sample_energy, cov_diag

    def loss_function(self, x, x_hat, z, gamma, lambda_energy, lambda_cov_diag):
        recon_error = torch.mean((x.view(*x_hat.shape) - x_hat) ** 2)
        phi, mu, cov = self.compute_gmm_params(z, gamma)
        sample_energy, cov_diag = self.compute_energy(z, phi, mu, cov)
        loss = recon_error + lambda_energy * sample_energy + lambda_cov_diag * cov_diag
        return loss, sample_energy, recon_error, cov_diag

In [132]:
detectors = DAGMM(num_epochs=10, sequence_length=3)

In [133]:
detectors.fit(X_train.iloc[:,:-1].copy())


100%|██████████| 10/10 [00:00<00:00, 19.05it/s]


In [134]:
X_train.iloc[:,:-1].shape

(300, 23)

# prepare feature for predict

In [135]:
score = detectors.predict(X_train.iloc[:,:-1].copy())

shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequenc

shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequence torch.Size([1, 3, 23])
shape of sequenc

In [137]:
score.shape

(300,)

In [179]:
output = pd.DataFrame({"cano_locdt_index":X_train.iloc[:,-1]})
output["score"] = score
print (output.shape)

(300, 2)


In [182]:
output["cosine_errors_mean"] = detectors.prediction_details["cosine_errors_mean"]
output["euclidean_errors_mean"]  = detectors.prediction_details["euclidean_errors_mean"]
data = detectors.prediction_details["reconstructions_mean"]
reconstructions_mean = pd.DataFrame(data.T,
             columns = ["reconstructions_mean_latent_features_{}".format(i) for i in range(data.shape[0])]
            )
print (reconstructions_mean.shape)
data = detectors.prediction_details["latent_representations"]
latent_representations = pd.DataFrame(data.T,
             columns = ["latent_representations_latent_features_{}".format(i) for i in range(data.shape[0])]
            )
print (latent_representations.shape)
output = pd.concat([output,reconstructions_mean,latent_representations], axis = 1)
print (output.shape)

(300, 23)
(300, 6)
(300, 62)


In [183]:
output

Unnamed: 0,cano_locdt_index,score,cosine_errors_mean,euclidean_errors_mean,reconstructions_mean_latent_features_0,reconstructions_mean_latent_features_1,reconstructions_mean_latent_features_2,reconstructions_mean_latent_features_3,reconstructions_mean_latent_features_4,reconstructions_mean_latent_features_5,reconstructions_mean_latent_features_6,reconstructions_mean_latent_features_7,reconstructions_mean_latent_features_8,reconstructions_mean_latent_features_9,reconstructions_mean_latent_features_10,reconstructions_mean_latent_features_11,reconstructions_mean_latent_features_12,reconstructions_mean_latent_features_13,reconstructions_mean_latent_features_14,reconstructions_mean_latent_features_15,reconstructions_mean_latent_features_16,reconstructions_mean_latent_features_17,reconstructions_mean_latent_features_18,reconstructions_mean_latent_features_19,reconstructions_mean_latent_features_20,reconstructions_mean_latent_features_21,reconstructions_mean_latent_features_22,latent_representations_latent_features_0,latent_representations_latent_features_1,latent_representations_latent_features_2,latent_representations_latent_features_3,latent_representations_latent_features_4,latent_representations_latent_features_5,reconstructions_mean_latent_features_0.1,reconstructions_mean_latent_features_1.1,reconstructions_mean_latent_features_2.1,reconstructions_mean_latent_features_3.1,reconstructions_mean_latent_features_4.1,reconstructions_mean_latent_features_5.1,reconstructions_mean_latent_features_6.1,reconstructions_mean_latent_features_7.1,reconstructions_mean_latent_features_8.1,reconstructions_mean_latent_features_9.1,reconstructions_mean_latent_features_10.1,reconstructions_mean_latent_features_11.1,reconstructions_mean_latent_features_12.1,reconstructions_mean_latent_features_13.1,reconstructions_mean_latent_features_14.1,reconstructions_mean_latent_features_15.1,reconstructions_mean_latent_features_16.1,reconstructions_mean_latent_features_17.1,reconstructions_mean_latent_features_18.1,reconstructions_mean_latent_features_19.1,reconstructions_mean_latent_features_20.1,reconstructions_mean_latent_features_21.1,reconstructions_mean_latent_features_22.1,latent_representations_latent_features_0.1,latent_representations_latent_features_1.1,latent_representations_latent_features_2.1,latent_representations_latent_features_3.1,latent_representations_latent_features_4.1,latent_representations_latent_features_5.1
0,0,27.631021,-0.295345,-0.059350,0.924081,-0.135590,0.499637,0.241374,-0.017618,0.693070,-0.296043,0.333671,0.215816,-0.632744,-0.082227,0.352146,0.786789,0.181245,-0.725199,-0.668880,0.253682,0.315962,0.589892,0.212861,0.284910,0.750360,0.456047,0.025644,-0.059350,-0.295345,-0.156371,0.089071,0.074201,0.924081,-0.135590,0.499637,0.241374,-0.017618,0.693070,-0.296043,0.333671,0.215816,-0.632744,-0.082227,0.352146,0.786789,0.181245,-0.725199,-0.668880,0.253682,0.315962,0.589892,0.212861,0.284910,0.750360,0.456047,0.025644,-0.059350,-0.295345,-0.156371,0.089071,0.074201
1,0,27.631021,0.382906,0.271174,0.727449,0.151623,-0.271612,-0.256591,0.490845,1.015347,0.207021,0.011632,0.392299,-0.386213,0.593477,0.432118,0.357133,0.118143,-0.396401,-0.241968,0.561575,0.709550,0.691831,0.556354,0.085471,0.667678,0.544193,-0.466960,0.271174,0.382906,-0.666963,0.448975,-0.493218,0.727449,0.151623,-0.271612,-0.256591,0.490845,1.015347,0.207021,0.011632,0.392299,-0.386213,0.593477,0.432118,0.357133,0.118143,-0.396401,-0.241968,0.561575,0.709550,0.691831,0.556354,0.085471,0.667678,0.544193,-0.466960,0.271174,0.382906,-0.666963,0.448975,-0.493218
2,3749_1,25.149796,0.375645,0.293028,0.615858,-0.015094,0.043652,0.213557,0.288134,0.883730,-0.065764,0.124521,0.471429,-0.453050,0.440064,0.413630,0.400986,0.135690,-0.418351,-0.236989,0.402316,0.329611,0.657517,0.392622,0.233974,0.523237,0.546441,-0.470165,0.293028,0.375645,-0.673999,0.433263,-0.466943,0.615858,-0.015094,0.043652,0.213557,0.288134,0.883730,-0.065764,0.124521,0.471429,-0.453050,0.440064,0.413630,0.400986,0.135690,-0.418351,-0.236989,0.402316,0.329611,0.657517,0.392622,0.233974,0.523237,0.546441,-0.470165,0.293028,0.375645,-0.673999,0.433263,-0.466943
3,0,14.849816,0.751846,0.524848,0.527854,0.061934,0.025055,-0.068244,0.514123,0.804582,0.102690,0.296015,0.670417,-0.188862,0.711714,0.656565,0.538123,-0.054643,-0.196332,-0.076617,0.599034,0.445602,0.639521,0.643435,0.317581,0.574855,0.386058,-0.706799,0.524848,0.751846,-0.971888,0.631818,-0.753402,0.527854,0.061934,0.025055,-0.068244,0.514123,0.804582,0.102690,0.296015,0.670417,-0.188862,0.711714,0.656565,0.538123,-0.054643,-0.196332,-0.076617,0.599034,0.445602,0.639521,0.643435,0.317581,0.574855,0.386058,-0.706799,0.524848,0.751846,-0.971888,0.631818,-0.753402
4,3749_1,-0.102822,0.774593,0.580416,0.463161,0.047286,0.021042,0.056516,0.490920,0.795669,0.052190,0.263364,0.665853,-0.185761,0.698043,0.608211,0.469882,-0.016209,-0.160862,-0.016941,0.559579,0.351703,0.597816,0.595374,0.312439,0.511031,0.411958,-0.700036,0.580416,0.774593,-1.004546,0.639794,-0.728963,0.463161,0.047286,0.021042,0.056516,0.490920,0.795669,0.052190,0.263364,0.665853,-0.185761,0.698043,0.608211,0.469882,-0.016209,-0.160862,-0.016941,0.559579,0.351703,0.597816,0.595374,0.312439,0.511031,0.411958,-0.700036,0.580416,0.774593,-1.004546,0.639794,-0.728963
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,114592_7,-6.041363,0.166159,0.052324,0.799040,0.059592,-0.156794,0.022642,0.224824,1.048931,-0.006963,-0.072168,0.292208,-0.620655,0.373231,0.281610,0.287987,0.239766,-0.599871,-0.393992,0.363112,0.473857,0.775260,0.342774,0.081585,0.606319,0.626860,-0.231496,0.052324,0.166159,-0.328720,0.132949,-0.314295,0.799040,0.059592,-0.156794,0.022642,0.224824,1.048931,-0.006963,-0.072168,0.292208,-0.620655,0.373231,0.281610,0.287987,0.239766,-0.599871,-0.393992,0.363112,0.473857,0.775260,0.342774,0.081585,0.606319,0.626860,-0.231496,0.052324,0.166159,-0.328720,0.132949,-0.314295
296,63818_7,-13.553216,0.364161,0.149798,0.718950,0.008564,0.028018,0.129240,0.285359,0.953450,-0.041098,0.102429,0.455793,-0.516891,0.444984,0.437084,0.443345,0.144968,-0.482219,-0.303066,0.420591,0.399034,0.710684,0.413561,0.223634,0.578219,0.562926,-0.367954,0.149798,0.364161,-0.473505,0.219472,-0.484520,0.718950,0.008564,0.028018,0.129240,0.285359,0.953450,-0.041098,0.102429,0.455793,-0.516891,0.444984,0.437084,0.443345,0.144968,-0.482219,-0.303066,0.420591,0.399034,0.710684,0.413561,0.223634,0.578219,0.562926,-0.367954,0.149798,0.364161,-0.473505,0.219472,-0.484520
297,114592_7,-14.528060,0.532978,0.369651,0.624850,0.056137,-0.011949,-0.026962,0.415428,0.877308,0.064062,0.184376,0.549605,-0.335890,0.588437,0.533188,0.476712,0.038410,-0.324567,-0.181855,0.501890,0.447872,0.685958,0.522313,0.240465,0.582773,0.456203,-0.452632,0.369651,0.532978,-0.663555,0.314323,-0.562679,0.624850,0.056137,-0.011949,-0.026962,0.415428,0.877308,0.064062,0.184376,0.549605,-0.335890,0.588437,0.533188,0.476712,0.038410,-0.324567,-0.181855,0.501890,0.447872,0.685958,0.522313,0.240465,0.582773,0.456203,-0.452632,0.369651,0.532978,-0.663555,0.314323,-0.562679
298,63818_7,-14.508166,0.535894,0.328998,0.620974,0.003399,0.024119,0.116367,0.349870,0.908796,0.039865,0.124301,0.585564,-0.473440,0.545585,0.495606,0.447903,0.052618,-0.399251,-0.198518,0.431417,0.386331,0.756053,0.415958,0.234272,0.486071,0.537504,-0.468846,0.328998,0.535894,-0.655420,0.336354,-0.584057,0.620974,0.003399,0.024119,0.116367,0.349870,0.908796,0.039865,0.124301,0.585564,-0.473440,0.545585,0.495606,0.447903,0.052618,-0.399251,-0.198518,0.431417,0.386331,0.756053,0.415958,0.234272,0.486071,0.537504,-0.468846,0.328998,0.535894,-0.655420,0.336354,-0.584057


In [97]:
detectors.prediction_details

{'latent_representations': array([[ 0.7802254 , -0.10729223,  0.31457698, ...,  0.31983497,
          0.27089506,  1.10672879],
        [ 0.98883927,  0.09547198,  0.41578778, ..., -0.02283099,
          0.01203567,  0.81264138],
        [ 1.01293802, -0.41170257,  0.10397224, ..., -0.57779823,
         -0.52440584,  0.75147319],
        [-1.05199564, -0.26685005, -0.6365966 , ..., -0.62019366,
         -0.58028763, -1.31965804],
        [-0.9647001 ,  0.4631204 , -0.04750307, ...,  0.66197101,
          0.60248911, -0.67978966],
        [ 1.112046  ,  0.14413276,  0.56184111, ...,  0.37602215,
          0.35842338,  1.25383401]]),
 'reconstructions_mean': array([[-0.43439987,  0.55227272,  0.25557916, ...,  0.32192115,
          0.61953512,  0.74356216],
        [-1.8718406 , -0.63063285, -0.69196452, ..., -0.64851837,
         -0.55238891, -0.20118597],
        [-2.08274555, -0.72016533,  0.07728161, ..., -0.20354194,
         -0.50705667,  2.21474838],
        ...,
        [-0.22628

In [99]:
detectors.prediction_details["cosine_errors_mean"].shape

(4818,)

In [101]:
detectors.prediction_details["euclidean_errors_mean"].shape

(4818,)

In [102]:
detectors.prediction_details["reconstructions_mean"].shape

(23, 4818)

In [103]:
detectors.prediction_details["latent_representations"].shape

(6, 4818)

In [174]:
data = detectors.prediction_details["reconstructions_mean"]
reconstructions_mean = pd.DataFrame(data.T,
             columns = ["reconstructions_mean_latent_features_{}".format(i) for i in range(data.shape[0])]
            )
reconstructions_mean.shape

(300, 23)

In [175]:
data = detectors.prediction_details["latent_representations"]
latent_representations = pd.DataFrame(data.T,
             columns = ["latent_representations_latent_features_{}".format(i) for i in range(data.shape[0])]
            )
latent_representations.shape

(300, 6)

In [192]:
output.head(10)

Unnamed: 0,cano_locdt_index,score,cosine_errors_mean,euclidean_errors_mean,reconstructions_mean_latent_features_0,reconstructions_mean_latent_features_1,reconstructions_mean_latent_features_2,reconstructions_mean_latent_features_3,reconstructions_mean_latent_features_4,reconstructions_mean_latent_features_5,reconstructions_mean_latent_features_6,reconstructions_mean_latent_features_7,reconstructions_mean_latent_features_8,reconstructions_mean_latent_features_9,reconstructions_mean_latent_features_10,reconstructions_mean_latent_features_11,reconstructions_mean_latent_features_12,reconstructions_mean_latent_features_13,reconstructions_mean_latent_features_14,reconstructions_mean_latent_features_15,reconstructions_mean_latent_features_16,reconstructions_mean_latent_features_17,reconstructions_mean_latent_features_18,reconstructions_mean_latent_features_19,reconstructions_mean_latent_features_20,reconstructions_mean_latent_features_21,reconstructions_mean_latent_features_22,latent_representations_latent_features_0,latent_representations_latent_features_1,latent_representations_latent_features_2,latent_representations_latent_features_3,latent_representations_latent_features_4,latent_representations_latent_features_5,reconstructions_mean_latent_features_0.1,reconstructions_mean_latent_features_1.1,reconstructions_mean_latent_features_2.1,reconstructions_mean_latent_features_3.1,reconstructions_mean_latent_features_4.1,reconstructions_mean_latent_features_5.1,reconstructions_mean_latent_features_6.1,reconstructions_mean_latent_features_7.1,reconstructions_mean_latent_features_8.1,reconstructions_mean_latent_features_9.1,reconstructions_mean_latent_features_10.1,reconstructions_mean_latent_features_11.1,reconstructions_mean_latent_features_12.1,reconstructions_mean_latent_features_13.1,reconstructions_mean_latent_features_14.1,reconstructions_mean_latent_features_15.1,reconstructions_mean_latent_features_16.1,reconstructions_mean_latent_features_17.1,reconstructions_mean_latent_features_18.1,reconstructions_mean_latent_features_19.1,reconstructions_mean_latent_features_20.1,reconstructions_mean_latent_features_21.1,reconstructions_mean_latent_features_22.1,latent_representations_latent_features_0.1,latent_representations_latent_features_1.1,latent_representations_latent_features_2.1,latent_representations_latent_features_3.1,latent_representations_latent_features_4.1,latent_representations_latent_features_5.1
0,0,27.631021,-0.295345,-0.05935,0.924081,-0.13559,0.499637,0.241374,-0.017618,0.69307,-0.296043,0.333671,0.215816,-0.632744,-0.082227,0.352146,0.786789,0.181245,-0.725199,-0.66888,0.253682,0.315962,0.589892,0.212861,0.28491,0.75036,0.456047,0.025644,-0.05935,-0.295345,-0.156371,0.089071,0.074201,0.924081,-0.13559,0.499637,0.241374,-0.017618,0.69307,-0.296043,0.333671,0.215816,-0.632744,-0.082227,0.352146,0.786789,0.181245,-0.725199,-0.66888,0.253682,0.315962,0.589892,0.212861,0.28491,0.75036,0.456047,0.025644,-0.05935,-0.295345,-0.156371,0.089071,0.074201
1,0,27.631021,0.382906,0.271174,0.727449,0.151623,-0.271612,-0.256591,0.490845,1.015347,0.207021,0.011632,0.392299,-0.386213,0.593477,0.432118,0.357133,0.118143,-0.396401,-0.241968,0.561575,0.70955,0.691831,0.556354,0.085471,0.667678,0.544193,-0.46696,0.271174,0.382906,-0.666963,0.448975,-0.493218,0.727449,0.151623,-0.271612,-0.256591,0.490845,1.015347,0.207021,0.011632,0.392299,-0.386213,0.593477,0.432118,0.357133,0.118143,-0.396401,-0.241968,0.561575,0.70955,0.691831,0.556354,0.085471,0.667678,0.544193,-0.46696,0.271174,0.382906,-0.666963,0.448975,-0.493218
2,3749_1,25.149796,0.375645,0.293028,0.615858,-0.015094,0.043652,0.213557,0.288134,0.88373,-0.065764,0.124521,0.471429,-0.45305,0.440064,0.41363,0.400986,0.13569,-0.418351,-0.236989,0.402316,0.329611,0.657517,0.392622,0.233974,0.523237,0.546441,-0.470165,0.293028,0.375645,-0.673999,0.433263,-0.466943,0.615858,-0.015094,0.043652,0.213557,0.288134,0.88373,-0.065764,0.124521,0.471429,-0.45305,0.440064,0.41363,0.400986,0.13569,-0.418351,-0.236989,0.402316,0.329611,0.657517,0.392622,0.233974,0.523237,0.546441,-0.470165,0.293028,0.375645,-0.673999,0.433263,-0.466943
3,0,14.849816,0.751846,0.524848,0.527854,0.061934,0.025055,-0.068244,0.514123,0.804582,0.10269,0.296015,0.670417,-0.188862,0.711714,0.656565,0.538123,-0.054643,-0.196332,-0.076617,0.599034,0.445602,0.639521,0.643435,0.317581,0.574855,0.386058,-0.706799,0.524848,0.751846,-0.971888,0.631818,-0.753402,0.527854,0.061934,0.025055,-0.068244,0.514123,0.804582,0.10269,0.296015,0.670417,-0.188862,0.711714,0.656565,0.538123,-0.054643,-0.196332,-0.076617,0.599034,0.445602,0.639521,0.643435,0.317581,0.574855,0.386058,-0.706799,0.524848,0.751846,-0.971888,0.631818,-0.753402
4,3749_1,-0.102822,0.774593,0.580416,0.463161,0.047286,0.021042,0.056516,0.49092,0.795669,0.05219,0.263364,0.665853,-0.185761,0.698043,0.608211,0.469882,-0.016209,-0.160862,-0.016941,0.559579,0.351703,0.597816,0.595374,0.312439,0.511031,0.411958,-0.700036,0.580416,0.774593,-1.004546,0.639794,-0.728963,0.463161,0.047286,0.021042,0.056516,0.49092,0.795669,0.05219,0.263364,0.665853,-0.185761,0.698043,0.608211,0.469882,-0.016209,-0.160862,-0.016941,0.559579,0.351703,0.597816,0.595374,0.312439,0.511031,0.411958,-0.700036,0.580416,0.774593,-1.004546,0.639794,-0.728963
5,210775_1,-12.619732,1.012949,0.709775,0.387155,0.03703,0.104426,0.030774,0.569312,0.702525,0.081885,0.390739,0.766583,-0.060772,0.770531,0.714979,0.558446,-0.109012,-0.049638,0.053448,0.619988,0.351526,0.558075,0.666303,0.389158,0.504092,0.331673,-0.834877,0.709775,1.012949,-1.173817,0.756216,-0.911121,0.387155,0.03703,0.104426,0.030774,0.569312,0.702525,0.081885,0.390739,0.766583,-0.060772,0.770531,0.714979,0.558446,-0.109012,-0.049638,0.053448,0.619988,0.351526,0.558075,0.666303,0.389158,0.504092,0.331673,-0.834877,0.709775,1.012949,-1.173817,0.756216,-0.911121
6,3749_1,-16.940465,1.09666,0.705653,0.394544,0.037204,0.120336,0.007432,0.593737,0.699279,0.095778,0.415976,0.793834,-0.044724,0.795758,0.746082,0.590015,-0.133887,-0.036226,0.058087,0.640044,0.374,0.564055,0.687906,0.405685,0.519862,0.318479,-0.927088,0.705653,1.09666,-1.22168,0.798013,-0.989326,0.394544,0.037204,0.120336,0.007432,0.593737,0.699279,0.095778,0.415976,0.793834,-0.044724,0.795758,0.746082,0.590015,-0.133887,-0.036226,0.058087,0.640044,0.374,0.564055,0.687906,0.405685,0.519862,0.318479,-0.927088,0.705653,1.09666,-1.22168,0.798013,-0.989326
7,210775_1,-16.401832,0.898658,0.557853,0.486657,0.00738,0.175337,0.041739,0.523104,0.728095,0.060682,0.405824,0.741287,-0.155162,0.694126,0.710677,0.628721,-0.103223,-0.15042,-0.05176,0.586947,0.391034,0.61074,0.625927,0.39871,0.551563,0.35364,-0.78663,0.557853,0.898658,-1.040015,0.678517,-0.851363,0.486657,0.00738,0.175337,0.041739,0.523104,0.728095,0.060682,0.405824,0.741287,-0.155162,0.694126,0.710677,0.628721,-0.103223,-0.15042,-0.05176,0.586947,0.391034,0.61074,0.625927,0.39871,0.551563,0.35364,-0.78663,0.557853,0.898658,-1.040015,0.678517,-0.851363
8,75818_1,-15.136129,0.741843,0.363658,0.605523,0.027722,0.107808,-0.026271,0.488546,0.828642,0.069423,0.319989,0.676679,-0.263114,0.662748,0.664831,0.609078,-0.052587,-0.264513,-0.141555,0.570657,0.464796,0.675333,0.608619,0.344979,0.608943,0.40502,-0.700171,0.363658,0.741843,-0.871878,0.607275,-0.779586,0.605523,0.027722,0.107808,-0.026271,0.488546,0.828642,0.069423,0.319989,0.676679,-0.263114,0.662748,0.664831,0.609078,-0.052587,-0.264513,-0.141555,0.570657,0.464796,0.675333,0.608619,0.344979,0.608943,0.40502,-0.700171,0.363658,0.741843,-0.871878,0.607275,-0.779586
9,210775_1,-15.213317,0.600765,0.296333,0.631184,0.037168,0.023529,-0.001885,0.448543,0.901886,0.055512,0.223482,0.607258,-0.329878,0.633247,0.582016,0.513401,0.018786,-0.327859,-0.175906,0.531355,0.452803,0.713578,0.570956,0.283335,0.602211,0.470376,-0.590139,0.296333,0.600765,-0.7645,0.529132,-0.665459,0.631184,0.037168,0.023529,-0.001885,0.448543,0.901886,0.055512,0.223482,0.607258,-0.329878,0.633247,0.582016,0.513401,0.018786,-0.327859,-0.175906,0.531355,0.452803,0.713578,0.570956,0.283335,0.602211,0.470376,-0.590139,0.296333,0.600765,-0.7645,0.529132,-0.665459


In [209]:
feature = []
for i in range(len(output)):
    if i%3 == 2:
        feature.append(output.iloc[i:i+1])
feature = pd.concat(feature,axis = 0)

In [210]:
len(feature)

100

In [211]:
feature.to_csv("/data/yunrui_li/fraud/fraud_detection/features/DAGMM_features.csv")

Unnamed: 0,cano_locdt_index,score,cosine_errors_mean,euclidean_errors_mean,reconstructions_mean_latent_features_0,reconstructions_mean_latent_features_1,reconstructions_mean_latent_features_2,reconstructions_mean_latent_features_3,reconstructions_mean_latent_features_4,reconstructions_mean_latent_features_5,reconstructions_mean_latent_features_6,reconstructions_mean_latent_features_7,reconstructions_mean_latent_features_8,reconstructions_mean_latent_features_9,reconstructions_mean_latent_features_10,reconstructions_mean_latent_features_11,reconstructions_mean_latent_features_12,reconstructions_mean_latent_features_13,reconstructions_mean_latent_features_14,reconstructions_mean_latent_features_15,reconstructions_mean_latent_features_16,reconstructions_mean_latent_features_17,reconstructions_mean_latent_features_18,reconstructions_mean_latent_features_19,reconstructions_mean_latent_features_20,reconstructions_mean_latent_features_21,reconstructions_mean_latent_features_22,latent_representations_latent_features_0,latent_representations_latent_features_1,latent_representations_latent_features_2,latent_representations_latent_features_3,latent_representations_latent_features_4,latent_representations_latent_features_5,reconstructions_mean_latent_features_0.1,reconstructions_mean_latent_features_1.1,reconstructions_mean_latent_features_2.1,reconstructions_mean_latent_features_3.1,reconstructions_mean_latent_features_4.1,reconstructions_mean_latent_features_5.1,reconstructions_mean_latent_features_6.1,reconstructions_mean_latent_features_7.1,reconstructions_mean_latent_features_8.1,reconstructions_mean_latent_features_9.1,reconstructions_mean_latent_features_10.1,reconstructions_mean_latent_features_11.1,reconstructions_mean_latent_features_12.1,reconstructions_mean_latent_features_13.1,reconstructions_mean_latent_features_14.1,reconstructions_mean_latent_features_15.1,reconstructions_mean_latent_features_16.1,reconstructions_mean_latent_features_17.1,reconstructions_mean_latent_features_18.1,reconstructions_mean_latent_features_19.1,reconstructions_mean_latent_features_20.1,reconstructions_mean_latent_features_21.1,reconstructions_mean_latent_features_22.1,latent_representations_latent_features_0.1,latent_representations_latent_features_1.1,latent_representations_latent_features_2.1,latent_representations_latent_features_3.1,latent_representations_latent_features_4.1,latent_representations_latent_features_5.1
2,3749_1,25.149796,0.375645,0.293028,0.615858,-0.015094,0.043652,0.213557,0.288134,0.88373,-0.065764,0.124521,0.471429,-0.45305,0.440064,0.41363,0.400986,0.13569,-0.418351,-0.236989,0.402316,0.329611,0.657517,0.392622,0.233974,0.523237,0.546441,-0.470165,0.293028,0.375645,-0.673999,0.433263,-0.466943,0.615858,-0.015094,0.043652,0.213557,0.288134,0.88373,-0.065764,0.124521,0.471429,-0.45305,0.440064,0.41363,0.400986,0.13569,-0.418351,-0.236989,0.402316,0.329611,0.657517,0.392622,0.233974,0.523237,0.546441,-0.470165,0.293028,0.375645,-0.673999,0.433263,-0.466943
5,210775_1,-12.619732,1.012949,0.709775,0.387155,0.03703,0.104426,0.030774,0.569312,0.702525,0.081885,0.390739,0.766583,-0.060772,0.770531,0.714979,0.558446,-0.109012,-0.049638,0.053448,0.619988,0.351526,0.558075,0.666303,0.389158,0.504092,0.331673,-0.834877,0.709775,1.012949,-1.173817,0.756216,-0.911121,0.387155,0.03703,0.104426,0.030774,0.569312,0.702525,0.081885,0.390739,0.766583,-0.060772,0.770531,0.714979,0.558446,-0.109012,-0.049638,0.053448,0.619988,0.351526,0.558075,0.666303,0.389158,0.504092,0.331673,-0.834877,0.709775,1.012949,-1.173817,0.756216,-0.911121
8,75818_1,-15.136129,0.741843,0.363658,0.605523,0.027722,0.107808,-0.026271,0.488546,0.828642,0.069423,0.319989,0.676679,-0.263114,0.662748,0.664831,0.609078,-0.052587,-0.264513,-0.141555,0.570657,0.464796,0.675333,0.608619,0.344979,0.608943,0.40502,-0.700171,0.363658,0.741843,-0.871878,0.607275,-0.779586,0.605523,0.027722,0.107808,-0.026271,0.488546,0.828642,0.069423,0.319989,0.676679,-0.263114,0.662748,0.664831,0.609078,-0.052587,-0.264513,-0.141555,0.570657,0.464796,0.675333,0.608619,0.344979,0.608943,0.40502,-0.700171,0.363658,0.741843,-0.871878,0.607275,-0.779586
11,82304_1,-15.438435,0.562335,0.327707,0.642774,0.062387,-0.02271,-0.072143,0.444377,0.905479,0.081667,0.197272,0.576214,-0.330938,0.633155,0.565252,0.494017,0.02411,-0.332853,-0.186496,0.538485,0.481061,0.71278,0.573434,0.253615,0.612833,0.458743,-0.559193,0.327707,0.562335,-0.754057,0.485718,-0.617515,0.642774,0.062387,-0.02271,-0.072143,0.444377,0.905479,0.081667,0.197272,0.576214,-0.330938,0.633155,0.565252,0.494017,0.02411,-0.332853,-0.186496,0.538485,0.481061,0.71278,0.573434,0.253615,0.612833,0.458743,-0.559193,0.327707,0.562335,-0.754057,0.485718,-0.617515
14,183589_1,-10.35236,0.421218,0.330247,0.626784,-0.019094,0.131504,0.192345,0.285185,0.826412,-0.058418,0.197169,0.494187,-0.435596,0.414585,0.468525,0.49593,0.089793,-0.401727,-0.250926,0.404252,0.334659,0.648013,0.391818,0.271153,0.528987,0.498933,-0.389964,0.330247,0.421218,-0.594045,0.257084,-0.454371,0.626784,-0.019094,0.131504,0.192345,0.285185,0.826412,-0.058418,0.197169,0.494187,-0.435596,0.414585,0.468525,0.49593,0.089793,-0.401727,-0.250926,0.404252,0.334659,0.648013,0.391818,0.271153,0.528987,0.498933,-0.389964,0.330247,0.421218,-0.594045,0.257084,-0.454371
17,170610_1,-8.051116,-0.479325,-0.452956,1.068793,0.011292,-0.050311,0.019336,-0.115911,1.028524,-0.127624,-0.23595,-0.016153,-1.00463,-0.09196,0.012223,0.326052,0.382858,-0.994101,-0.743332,0.108234,0.475586,0.820309,-0.007228,-0.069869,0.647546,0.668417,0.214242,-0.452956,-0.479325,0.281589,-0.259636,0.111887,1.068793,0.011292,-0.050311,0.019336,-0.115911,1.028524,-0.127624,-0.23595,-0.016153,-1.00463,-0.09196,0.012223,0.326052,0.382858,-0.994101,-0.743332,0.108234,0.475586,0.820309,-0.007228,-0.069869,0.647546,0.668417,0.214242,-0.452956,-0.479325,0.281589,-0.259636,0.111887
20,195913_1,-13.515963,0.286732,0.090756,0.784661,0.105347,-0.231716,-0.105203,0.290835,1.06063,0.052055,-0.078728,0.342486,-0.558991,0.494362,0.329095,0.279671,0.209406,-0.534356,-0.321558,0.436658,0.495857,0.76525,0.430336,0.078542,0.628457,0.586813,-0.364597,0.090756,0.286732,-0.475909,0.315635,-0.43812,0.784661,0.105347,-0.231716,-0.105203,0.290835,1.06063,0.052055,-0.078728,0.342486,-0.558991,0.494362,0.329095,0.279671,0.209406,-0.534356,-0.321558,0.436658,0.495857,0.76525,0.430336,0.078542,0.628457,0.586813,-0.364597,0.090756,0.286732,-0.475909,0.315635,-0.43812
23,170610_1,-14.114964,0.2969,0.05528,0.75899,-0.056223,0.145878,0.247227,0.205369,0.922946,-0.113946,0.119501,0.425231,-0.600623,0.308265,0.391904,0.489009,0.162934,-0.570503,-0.382828,0.343942,0.358878,0.728014,0.313461,0.234067,0.570394,0.592542,-0.362154,0.05528,0.2969,-0.442716,0.286679,-0.475204,0.75899,-0.056223,0.145878,0.247227,0.205369,0.922946,-0.113946,0.119501,0.425231,-0.600623,0.308265,0.391904,0.489009,0.162934,-0.570503,-0.382828,0.343942,0.358878,0.728014,0.313461,0.234067,0.570394,0.592542,-0.362154,0.05528,0.2969,-0.442716,0.286679,-0.475204
26,170610_1,-9.012221,-0.879757,-0.895496,1.168118,0.023483,-0.112907,0.013105,-0.236015,1.088518,-0.168252,-0.366377,-0.159487,-1.162215,-0.221855,-0.11703,0.249134,0.481662,-1.161784,-0.857463,0.018173,0.486597,0.839685,-0.10332,-0.151448,0.662798,0.715236,0.379197,-0.895496,-0.879757,0.600321,-0.316779,0.291099,1.168118,0.023483,-0.112907,0.013105,-0.236015,1.088518,-0.168252,-0.366377,-0.159487,-1.162215,-0.221855,-0.11703,0.249134,0.481662,-1.161784,-0.857463,0.018173,0.486597,0.839685,-0.10332,-0.151448,0.662798,0.715236,0.379197,-0.895496,-0.879757,0.600321,-0.316779,0.291099
29,82829_1,-8.626847,-0.353944,-0.403442,1.001941,0.068093,-0.260465,0.014962,-0.03639,1.142776,-0.087421,-0.321489,-0.009819,-0.933173,0.073287,0.012137,0.142435,0.436228,-0.905866,-0.628491,0.194643,0.484288,0.796935,0.116593,-0.089195,0.637143,0.727846,0.107924,-0.403442,-0.353944,0.142599,-0.084804,-0.004055,1.001941,0.068093,-0.260465,0.014962,-0.03639,1.142776,-0.087421,-0.321489,-0.009819,-0.933173,0.073287,0.012137,0.142435,0.436228,-0.905866,-0.628491,0.194643,0.484288,0.796935,0.116593,-0.089195,0.637143,0.727846,0.107924,-0.403442,-0.353944,0.142599,-0.084804,-0.004055
