In [0]:
from __future__ import print_function
from __future__ import absolute_import
from __future__ import division

import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import timeit

from colorama import Fore
from sklearn.metrics import auc, roc_curve, precision_score, recall_score

from utils.vocab import Vocabulary
from utils.reader import Data
from utils.utils import print_progress, create_checkpoints_dir

In [0]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

In [7]:
params = {
    "batch_size": 128,
    "embed_size": 64,
    "hidden_size": 64,
    "num_layers": 2,
    "checkpoints": "./checkpoints/",
    "std_factor": 6.,
    "dropout": 0.7,
}

path_normal_data = "./datasets/vulnbank_train.txt"
path_anomaly_data = "./datasets/vulnbank_test.txt"

create_checkpoints_dir(params["checkpoints"])

vocab = Vocabulary()
params["vocab"] = vocab

d = Data(path_normal_data)

Downloaded 11071 samples
edadeaedadedededaeda
<type 'list'>
11071
11071


In [0]:
class Seq2Seq():
    def __init__(self, args):
        tf.reset_default_graph()

        self.batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        self.max_seq_len = tf.placeholder(tf.int32, [], name='max_seq_len')
        self.inputs = tf.placeholder(tf.int32, [None, None], name='inputs')
        self.targets = tf.placeholder(tf.int32, [None, None], name='targets')
        self.lengths = tf.placeholder(tf.int32, [None, ], name='lengths')
        self.dropout = tf.placeholder(tf.float32, name='dropout')
        
        self.num_layers = args['num_layers']
        self.hidden_size = args['hidden_size']
        self.vocab = args['vocab']

        dec_input = self._process_decoder_input(
            self.targets,
            self.vocab.vocab,
            tf.to_int32(self.batch_size))

        vocab_size = len(self.vocab.vocab)

        # Embeddings for inputs
        embed_initializer = tf.random_uniform_initializer(-np.sqrt(3), np.sqrt(3))

        with tf.variable_scope('embedding'):
            embeds = tf.get_variable(
                'embed_matrix',
                [vocab_size, args['embed_size']],
                initializer=embed_initializer,
                dtype=tf.float32)

            enc_embed_input = tf.nn.embedding_lookup(embeds, self.inputs)
            
        enc_outputs, enc_state = self._encoder(enc_embed_input)
        
        # Embeddings for outputs
        with tf.variable_scope('embedding', reuse=True):
            dec_embed_input = tf.nn.embedding_lookup(embeds, dec_input)

        dec_outputs = self._decoder(enc_outputs, enc_state, dec_embed_input)

        weight, bias = self._weight_and_bias(args['hidden_size'], vocab_size)
        outputs = tf.reshape(dec_outputs[0].rnn_output, [-1, args['hidden_size']])
        logits = tf.matmul(outputs, weight) + bias

        logits = tf.reshape(logits, [-1, self.max_seq_len, vocab_size], name='logits')
        self.probs = tf.nn.softmax(logits, name='probs')
        self.decoder_outputs = tf.argmax(logits, axis=2)

        self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=self.targets,
            name='cross_entropy')
        self.batch_loss = tf.identity(tf.reduce_mean(self.cross_entropy, axis=1), name='batch_loss')
        self.loss = tf.reduce_mean(self.cross_entropy)

        self.train_optimizer = self._optimizer(self.loss)

        # Saver
        self.saver = tf.train.Saver()
        
    def _encoder(self, enc_embed_input):
        """
        Adds an encoder to the model architecture.
        """
        cells = [self._lstm_cell(self.hidden_size) for _ in range(self.num_layers)]
        multilstm = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

        enc_outputs, enc_state = tf.nn.dynamic_rnn(
            multilstm,
            enc_embed_input,
            swap_memory = True,
            sequence_length=self.lengths,
            dtype=tf.float32)
        enc_outputs = tf.concat([enc_outputs[0], enc_outputs[1]], -1)
        return enc_outputs,enc_state
    
    def _decoder(self,enc_outputs, enc_state, dec_embed_input):
        """
        Adds a decoder to the model architecture.
        """
        output_lengths = tf.ones([self.batch_size], tf.int32) * self.max_seq_len
        helper = tf.contrib.seq2seq.TrainingHelper(
            dec_embed_input,
            output_lengths,
            time_major=False)

        cells = [self._lstm_cell(self.hidden_size) for _ in range(self.num_layers)]
        dec_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
              
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=self.hidden_size, memory=enc_outputs,memory_sequence_length=self.lengths)
        #attention_mechanism = tf.contrib.seq2seq.LuongAttention(num_units=self.rnn_size, memory=encoder_outputs, memory_sequence_length=encoder_inputs_length)

        dec_cell = tf.contrib.seq2seq.AttentionWrapper(cell=dec_cell, attention_mechanism=attention_mechanism,attention_layer_size=self.hidden_size, name='Attention_Wrapper')
                       

        
        decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, helper, enc_outputs)
        
        
      
        dec_outputs = tf.contrib.seq2seq.dynamic_decode(decoder, impute_finished=True,maximum_iterations=self.max_seq_len)

        return dec_outputs
    
    def _optimizer(self, loss,):
        """
        Optimizes weights given a loss. 
        """
        def _learning_rate_decay_fn(learning_rate, global_step):
            return tf.train.exponential_decay(learning_rate, global_step, decay_steps=10000, decay_rate=0.99)

        starting_lr = 0.001
        starting_global_step = tf.Variable(0, trainable=False)
        optimizer = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=starting_global_step,
            learning_rate=starting_lr,
            optimizer=tf.train.AdamOptimizer,
            learning_rate_decay_fn=lambda lr, gs: _learning_rate_decay_fn(lr, gs),
            clip_gradients=5.0)
        
        return optimizer
    
    def _process_decoder_input(self, target_data, char_to_code, batch_size):
        """
        Concatenates the <GO> to the begining of each batch.
        """
        ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
        #print(ending)
        dec_input = tf.concat([tf.fill([batch_size, 1], char_to_code['<GO>']), ending], 1)
        return dec_input

    def _lstm_cell(self, hidden_size):
        """
        Returns LSTM cell with dropout.
        """
        cell = tf.contrib.rnn.LSTMCell(
            hidden_size,
            initializer=tf.contrib.layers.xavier_initializer())

        cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.dropout)

        return cell

    def _weight_and_bias(self, in_size, out_size):
        """
        Initializes weights and biases.
        """
        weight = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.01))
        bias = tf.Variable(tf.constant(1., shape=[out_size]))

        return weight, bias

In [0]:
class Seq2Seq():
    def __init__(self, args):
        tf.reset_default_graph()

        self.batch_size = tf.placeholder(tf.int32, [], name='batch_size')
        self.max_seq_len = tf.placeholder(tf.int32, [], name='max_seq_len')
        self.inputs = tf.placeholder(tf.int32, [None, None], name='inputs')
        self.targets = tf.placeholder(tf.int32, [None, None], name='targets')
        self.lengths = tf.placeholder(tf.int32, [None, ], name='lengths')
        self.dropout = tf.placeholder(tf.float32, name='dropout')
        
        self.num_layers = args['num_layers']
        self.hidden_size = args['hidden_size']
        self.vocab = args['vocab']

        dec_input = self._process_decoder_input(
            self.targets,
            self.vocab.vocab,
            tf.to_int32(self.batch_size))

        vocab_size = len(self.vocab.vocab)

        # Embeddings for inputs
        embed_initializer = tf.random_uniform_initializer(-np.sqrt(3), np.sqrt(3))

        with tf.variable_scope('embedding'):
            embeds = tf.get_variable(
                'embed_matrix',
                [vocab_size, args['embed_size']],
                initializer=embed_initializer,
                dtype=tf.float32)

            enc_embed_input = tf.nn.embedding_lookup(embeds, self.inputs)
            
        enc_state = self._encoder(enc_embed_input)
        
        # Embeddings for outputs
        with tf.variable_scope('embedding', reuse=True):
            dec_embed_input = tf.nn.embedding_lookup(embeds, dec_input)

        dec_outputs = self._decoder(enc_state, dec_embed_input)

        weight, bias = self._weight_and_bias(args['hidden_size'], vocab_size)
        outputs = tf.reshape(dec_outputs[0].rnn_output, [-1, args['hidden_size']])
        logits = tf.matmul(outputs, weight) + bias

        logits = tf.reshape(logits, [-1, self.max_seq_len, vocab_size], name='logits')
        self.probs = tf.nn.softmax(logits, name='probs')
        self.decoder_outputs = tf.argmax(logits, axis=2)

        self.cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=self.targets,
            name='cross_entropy')
        self.batch_loss = tf.identity(tf.reduce_mean(self.cross_entropy, axis=1), name='batch_loss')
        self.loss = tf.reduce_mean(self.cross_entropy)

        self.train_optimizer = self._optimizer(self.loss)

        # Saver
        self.saver = tf.train.Saver()
        
    def _encoder(self, enc_embed_input):
        """
        Adds an encoder to the model architecture.
        """
        cells = [self._lstm_cell(self.hidden_size) for _ in range(self.num_layers)]
        multilstm = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

        _, enc_state = tf.nn.dynamic_rnn(
            multilstm,
            enc_embed_input,
            sequence_length=self.lengths,
            swap_memory=True,
            dtype=tf.float32)
        
        return enc_state
    
    def _decoder(self, enc_state, dec_embed_input):
        """
        Adds a decoder to the model architecture.
        """
        output_lengths = tf.ones([self.batch_size], tf.int32) * self.max_seq_len
        helper = tf.contrib.seq2seq.TrainingHelper(
            dec_embed_input,
            output_lengths,
            time_major=False)

        cells = [self._lstm_cell(self.hidden_size) for _ in range(self.num_layers)]
        dec_cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)

        decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, helper, enc_state)

        dec_outputs = tf.contrib.seq2seq.dynamic_decode(
            decoder,
            output_time_major=False,
            impute_finished=True,
            maximum_iterations=self.max_seq_len, swap_memory=True)
        
        return dec_outputs
    
    def _optimizer(self, loss,):
        """
        Optimizes weights given a loss. 
        """
        def _learning_rate_decay_fn(learning_rate, global_step):
            return tf.train.exponential_decay(learning_rate, global_step, decay_steps=10000, decay_rate=0.99)

        starting_lr = 0.001
        starting_global_step = tf.Variable(0, trainable=False)
        optimizer = tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=starting_global_step,
            learning_rate=starting_lr,
            optimizer=tf.train.AdamOptimizer,
            learning_rate_decay_fn=lambda lr, gs: _learning_rate_decay_fn(lr, gs),
            clip_gradients=5.0)
        
        return optimizer
    
    def _process_decoder_input(self, target_data, char_to_code, batch_size):
        """
        Concatenates the <GO> to the begining of each batch.
        """
        ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
        dec_input = tf.concat([tf.fill([batch_size, 1], char_to_code['<GO>']), ending], 1)

        return dec_input

    def _lstm_cell(self, hidden_size):
        """
        Returns LSTM cell with dropout.
        """
        cell = tf.contrib.rnn.LSTMCell(
            hidden_size,
            initializer=tf.contrib.layers.xavier_initializer())

        cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.dropout)

        return cell

    def _weight_and_bias(self, in_size, out_size):
        """
        Initializes weights and biases.
        """
        weight = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.01))
        bias = tf.Variable(tf.constant(1., shape=[out_size]))

        return weight, bias


In [9]:
print(d.data[0:10])
print(d.lengths[0:10])

[[4, 7, 4, 28, 4, 1], [5, 6, 4, 6, 5, 4, 5, 6, 4, 6, 5, 6, 5, 6, 5, 6, 4, 5, 6, 4, 1], [4, 10, 4, 1], [4, 17, 4, 6, 5, 4, 5, 6, 4, 6, 5, 4, 6, 5, 4, 5, 6, 4, 6, 5, 7, 28, 7, 17, 7, 1], [4, 5, 6, 5, 4, 5, 6, 4, 6, 5, 4, 6, 5, 4, 6, 5, 4, 5, 6, 4, 10, 1], [4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 6, 5, 4, 1], [8, 1], [4, 5, 6, 4, 6, 5, 4, 5, 6, 4, 5, 6, 4, 1], [4, 5, 6, 4, 1], [4, 5, 6, 5, 4, 6, 5, 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 5, 6, 4, 7, 18, 1]]
[6, 21, 4, 26, 22, 14, 2, 14, 5, 23]


In [0]:
class Trainer():

    def __init__(self, batch_size, checkpoints_path, dropout):
        self.batch_size = batch_size
        self.checkpoints = checkpoints_path
        self.path_to_graph = checkpoints_path + 'seq2seq'
        self.dropout = dropout

    def train(self, model, train_data, train_size, num_steps, num_epochs, min_loss=0.1):
        """
        Trains a given model architecture with given train data.
        """
        tf.set_random_seed(1234)
        
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            total_loss = []
            timings = []
            steps_per_epoch = int(train_size / self.batch_size)
            num_epoch = 1
            
            for step in range(1, num_steps):
                beg_t = timeit.default_timer()
                X, L = train_data.next()
                seq_len = np.max(L)

                # For anomaly detection problem we reconstruct input data, so
                # targets and inputs are identical.
                feed_dict = {
                    model.inputs: X,
                    model.targets: X,
                    model.lengths: L,
                    model.dropout: self.dropout,
                    model.batch_size: self.batch_size,
                    model.max_seq_len: seq_len}
                
                fetches = [model.loss, model.decoder_outputs, model.train_optimizer]
                step_loss, _, _ = sess.run(fetches, feed_dict)

                total_loss.append(step_loss)
                timings.append(timeit.default_timer() - beg_t)

                if step % steps_per_epoch == 0:
                    num_epoch += 1

                if step % 200 == 0 or step == 1:
                    print_progress(
                        int(step / 200),
                        num_epoch,
                        np.mean(total_loss),
                        np.mean(step_loss),
                        np.sum(timings))
                    timings = []

                if step == 1:
                    _ = tf.train.export_meta_graph(filename=self.path_to_graph + '.meta')
                
                if np.mean(total_loss) < min_loss or num_epoch > num_epochs:
                    model.saver.save(sess, self.path_to_graph, global_step=step)
                    print("Training is finished.")
                    break


In [11]:
model = Seq2Seq(params)
t = Trainer(params["batch_size"], params["checkpoints"], params["dropout"])

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [12]:
num_steps = 10 ** 6
num_epochs = 60

train_gen = d.train_generator(params["batch_size"], num_epochs)
train_size = d.train_size

t.train(model, train_gen, train_size, num_steps, num_epochs)

Step 0 (epoch 1), average_train_loss = 4.27602, step_loss = 4.27602, time_per_step = 1.584
Step 1 (epoch 4), average_train_loss = 1.33688, step_loss = 0.56431, time_per_step = 49.109
Step 2 (epoch 7), average_train_loss = 0.94964, step_loss = 0.54772, time_per_step = 49.571
Step 3 (epoch 10), average_train_loss = 0.77622, step_loss = 0.40292, time_per_step = 49.444
Step 4 (epoch 13), average_train_loss = 0.66395, step_loss = 0.27284, time_per_step = 49.241
Step 5 (epoch 17), average_train_loss = 0.58083, step_loss = 0.17995, time_per_step = 49.159
Step 6 (epoch 20), average_train_loss = 0.51713, step_loss = 0.17119, time_per_step = 49.341
Step 7 (epoch 23), average_train_loss = 0.46713, step_loss = 0.20865, time_per_step = 49.380
Step 8 (epoch 26), average_train_loss = 0.42638, step_loss = 0.11940, time_per_step = 49.335
Step 9 (epoch 30), average_train_loss = 0.39275, step_loss = 0.13402, time_per_step = 49.348
Step 10 (epoch 33), average_train_loss = 0.36414, step_loss = 0.08014, tim

In [0]:
class Predictor():
    def __init__(self, checkpoints_path, std_factor, vocab):

        self.threshold = 0.
        self.checkpoints = checkpoints_path
        self.path_to_graph = checkpoints_path + 'seq2seq'
        self.std_factor = std_factor
        self.vocab = vocab
        self.__load()

    def __load(self):
        """
        Loads model from the checkpoint directory and sets models params. 
        """
        try:
            loaded_graph = tf.Graph()
            with loaded_graph.as_default():
                saver = tf.train.import_meta_graph(
                    self.path_to_graph + '.meta')

            self.sess = tf.Session(graph=loaded_graph)
            saver.restore(self.sess, tf.train.latest_checkpoint(
                self.checkpoints))

            # loading model parameters
            self.inputs = loaded_graph.get_tensor_by_name('inputs:0')
            self.targets = loaded_graph.get_tensor_by_name('targets:0')
            self.lengths = loaded_graph.get_tensor_by_name('lengths:0')
            self.dropout = loaded_graph.get_tensor_by_name('dropout:0')
            self.batch_size_tensor = loaded_graph.get_tensor_by_name('batch_size:0')
            self.seq_len_tensor = loaded_graph.get_tensor_by_name('max_seq_len:0')
            self.get_batch_loss = loaded_graph.get_tensor_by_name('batch_loss:0')
            self.get_probabilities = loaded_graph.get_tensor_by_name('probs:0')
            self.get_logits = loaded_graph.get_tensor_by_name('logits:0')
            
        except Exception as e:
            raise ValueError('Unable to create model: {}'.format(e))

    def set_threshold(self, data_gen):
        """
        Calculates threshold for anomaly detection.
        """
        
        total_loss = []
        for seq, l in data_gen:
            batch_loss, _ = self._predict_for_request(seq, l)
            total_loss.extend(batch_loss)

        mean = np.mean(total_loss)
        std = np.std(total_loss)
        self.threshold = mean + self.std_factor * std

        print('Validation loss mean: ', mean)
        print('Validation loss std: ', std)
        print('Threshold for anomaly detection: ', self.threshold)
        
        return self.threshold

    def predict(self, data_gen, visual=True):
        """
        Predicts probabilities and loss for given sequences.
        """
        loss = []
        predictions = []
        num_displayed = 0
        
        for seq, l in data_gen:
            batch_loss, alphas = self._predict_for_request(seq, l)
            #print('Batchloss:',batch_loss)
            #print('alphas:',alphas)
            #print('shape,alphas:',alphas.shape)
            loss.extend(batch_loss)
            alphas = self._process_alphas(seq, alphas, 1)
            mask = np.array([l > self.threshold for l in batch_loss])
            final_pred = mask.astype(int)
            predictions.extend(final_pred)
            
            if visual and num_displayed < 10 and final_pred == [1]:
              
                print('\n\nPrediction: ', final_pred[0])
                print('Loss ', batch_loss[0])
                
                num_displayed += 1 
                self._visual(alphas, seq)
        
        return predictions, loss

    def _predict_for_request(self, X, l):
        """
        Predicts probabilities and loss for given data. 
        """
        lengths = [l]
        max_seq_len = l
        feed_dict = {
            self.inputs: X,
            self.targets: X,
            self.lengths: lengths,
            self.dropout: 1.0,
            self.batch_size_tensor: 1,
            self.seq_len_tensor: max_seq_len}

        fetches = [self.get_batch_loss, self.get_probabilities]
        #print('fetches:',self.sess.run(fetches))
        batch_loss, alphas = self.sess.run(fetches, feed_dict=feed_dict)
        #这里的 alpha是预测出来的行为序列的向量形式size为[1*len_sentence*72]
        return batch_loss, alphas

    def _process_alphas(self, X, alphas, batch_size):
        """
        Counts numbers as probabilities for given data sample.
        """
        processed_alphas = []
        for i in range(batch_size):
           
            probs = alphas[i]
            coefs = np.array([probs[j][X[i][j]] for j in range(len(X[i]))])
            coefs = coefs / coefs.max()
            processed_alphas.append(coefs)
            
        return processed_alphas

    def _visual(self, alphas, X):
        """
        Colors sequence of malicious characters.
        """
        for i, x in enumerate(X):
            coefs = alphas[i]
            tokens = self.vocab.int_to_string(x)
            
            for j in range(len(x)):
                token = tokens[j]
                if coefs[j] < 0.09:
                    c = Fore.RED
                else:
                    c = Fore.BLACK
                if token != '<PAD>' and token != '<EOS>':
                    token = ''.join(c + token)
                    print(token, end='')
                    
            print(Fore.BLACK + '', end='')

In [23]:
p = Predictor(params["checkpoints"], params["std_factor"], params["vocab"])

INFO:tensorflow:Restoring parameters from ./checkpoints/seq2seq-3720


In [24]:
val_gen = d.val_generator()
threshold = p.set_threshold(val_gen)

Validation loss mean:  0.134033
Validation loss std:  0.3600101
Threshold for anomaly detection:  2.294093519449234


In [25]:
test_gen = d.test_generator()
valid_preds, valid_loss = p.predict(test_gen)



Prediction:  1
Loss  2.511054
[31m4[30m

Prediction:  1
Loss  3.1032686
[31mR[30mF[31m%[30m

Prediction:  1
Loss  2.6767395
[30ma[30me[30ma[30me[30ma[30me[30ma[30me[30ma[30me[30ma[30me[30ma[30me[30ma[30me[30ma[30me[30ma[31me[31mq[31mT[31mG[31mT[31mG[31mT[31mG[31mT[31mG[31mw[31mb[31mp[30mb[31mp[30mb[31mp[30mb[30mw[30m

Prediction:  1
Loss  2.7230406
[30mg[30mb[30mg[30mb[30mg[30mb[30mg[31mb[30mg[31mb[30mg[31mr[30mg[31mb[30mg[31mr[30mg[31mr[30mg[31mr[31mg[30m

Prediction:  1
Loss  2.637253
[31mC[30m

Prediction:  1
Loss  3.843855
[30ma[30mb[30ma[30mb[31mC[30ma[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31ma[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[30mb[31mC[31mo[31mC[30mo[31mC[30mo[31mC[30mo[31mC[30mo[31mC[31mo[31mC[31mo[31mC[31mo[31mC

In [26]:
print('Number of FP: ', np.sum(valid_preds))
print('Number of samples: ', len(valid_preds))
print('FP rate: {:.4f}'.format(np.sum(valid_preds) / len(valid_preds)))

Number of FP:  6
Number of samples:  1108
FP rate: 0.0054


In [27]:
pred_data = Data(path_anomaly_data, predict=True)
pred_gen = pred_data.predict_generator()
anomaly_preds, anomaly_loss = p.predict(pred_gen)

Downloaded 14242 samples
a
<type 'list'>
14242
14242


Prediction:  1
Loss  2.679546
[30mb[31mp[30mb[30mp[30mb[30mp[30mb[30mp[30mb[30mn[30mb[30mn[30mb[30mp[30mb[30mn[30mb[31mI[30mb[31mI[30mb[30mo[30mb[31mI[30mb[30mn[30mb[30mo[30mb[30mo[30mb[30mo[30mb[31mI[30mb[31mI[30mb[30mo[30mb[31mI[30mb[30mn[30mb[30mo[30mb[31mI[30mb[31mI[30mb[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[30mo[31mI[31mo[31mI[31mo[31mI[31mo[30m

Predicti

In [28]:
print('Number of TP: ', len(anomaly_preds)-np.sum(anomaly_preds))
print('Number of samples: ', len(anomaly_preds))
print('TP rate: {:.4f}'.format((len(anomaly_preds)-np.sum(anomaly_preds)) / len(anomaly_preds)))

Number of TP:  14189
Number of samples:  14242
TP rate: 0.9963
