In [1]:
cd /home/jovyan/assets

/home/jovyan/assets


In [469]:
# -- Tensorflow -- #
import tensorflow as tf

from tensorflow.keras.layers import (
    Softmax,
    Dense,
    AdditiveAttention,
    MultiHeadAttention,
    Layer,
    LayerNormalization,
    Dropout,
    Embedding
)

from tensorflow.keras import (
    Sequential,
    Model
)
from transformers import PreTrainedTokenizerFast

import pandas as pd
import re
from copy import deepcopy

from datasets import Dataset

from functools import partial

import numpy as np

from libs.transformers.src.transformers.models.bert.modeling_tf_bert import TFBertModel

### Lets do this

In [470]:
new_model = TFBertModel.from_pretrained("../results/PreTrainedModel/", local_files_only=True)
tokenizer = PreTrainedTokenizerFast.from_pretrained("../results/PreTrainedModel/", local_files_only=True)

Some layers from the model checkpoint at ../results/PreTrainedModel/ were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at ../results/PreTrainedModel/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [471]:
log = "INFO  HealthCheckService:106 - status: HEALTHY, name: render /ruleset/retrieveAllRulesets, duration_ms: 2, failure_reason: none"

In [472]:
log2 = "INFO  (qtp1991278377-18) [c:catalog s:shard1 r:core_node3 x:catalog_shard1_replica_n1] o.a.s.u.p.LogUpdateProcessorFactory [catalog_shard1_replica_n1]  webapp=/solr path=/update params={wt=javabin&version=2}{add=[ffeef73262fd4957989d0835d2c64cb0 (1690245413032951808), 00311ec2b68445cfa7bd6b94bb3f8868 (1690245413036097536), 809c0dd680da476fa20bccb21e7a791f (1690245413038194688), c7ab97f8c8194adaad5268a04c01fd4c (1690245413041340416), 1e68fda379694be98c5731c96c3becfc (1690245413044486144), dd1c48d436c74056b9d5329b958cdfb6 (1690245413048680448), c547b31c7250473998b6c72a8cdc3e41 (1690245413073846272), 4429838b832b4a98869ee7cfc84e4520 (1690245413075943424), 23b29fba566c4b19b3b8a1da7901462f (1690245413080137728), 393753568bdf42cda367643d3e87a3c3 (1690245413088526336), ... (250 adds)]} 0 1000"

In [473]:
dt = pd.DataFrame({"logs": [log, log2]})

In [476]:
x = tokenizer(
    dt["logs"].tolist(),
    padding='max_length',
    max_length=300,
    return_tensors='tf'
)
x = dict(x)

In [477]:
new_model(x, training=False)

TFBaseModelOutputWithPooling(last_hidden_state=<tf.Tensor: shape=(2, 300, 512), dtype=float32, numpy=
array([[[ 1.5100684 ,  0.81711674, -1.8835433 , ...,  0.33733317,
          1.5984166 ,  0.19801913],
        [-1.4137377 ,  2.0870078 , -0.35905617, ...,  0.6584699 ,
         -0.36399263, -1.2063018 ],
        [ 0.8806996 ,  0.7416481 , -0.16320974, ...,  0.18457912,
         -1.4883114 ,  0.3758193 ],
        ...,
        [ 1.4394886 ,  0.6448962 , -0.90818644, ..., -0.38945514,
          0.370754  , -1.6600301 ],
        [ 1.4173337 ,  0.9892787 , -0.6283121 , ...,  0.31201038,
         -0.39546224, -1.9692696 ],
        [ 0.7004079 ,  0.63683754, -0.46776372, ...,  0.03642247,
          0.62092674, -3.3203356 ]],

       [[ 1.4250323 ,  0.80526364, -1.7922479 , ...,  0.22904322,
          1.6746545 ,  0.2019776 ],
        [-1.4716982 ,  2.1031938 , -0.24442998, ...,  0.5462447 ,
         -0.29639524, -1.2052654 ],
        [ 0.4133391 ,  0.7002309 , -0.74047464, ...,  1.0383904 ,
 

# Transformer Pipeline

In [None]:
@dataclass
class TransformerGlobalConfig:
    d_model: int = 512
    max_seq_length: int = 200
    global_training: bool = True
    storage_path: str = '/results/'

    def load(self, config):
        set_attributes(self, config)


@dataclass
class BERTLayerConfig:
    num_attention_heads: int = 8
    num_encoder_layers: int = 12
    dff: int = 2048
    max_seq_len: int = 2048
    dropout_rate: float = 0.1
    load_model: bool = False
    save_model: bool = True
    training: bool = True

    def load(self, config):
        set_attributes(self, config)


@dataclass
class HitAnomalyLayerConfig:
    num_attention_heads: int = 12
    num_encoder_layers: int = 3
    dff: int = 2048
    max_seq_len: int = 2048
    dropout_rate: float = 0.1
    load_model: bool = False
    save_model: bool = True
    training: bool = True

    def load(self, config):
        set_attributes(self, config)


class TransformerConfig:
    def __init__(self):
        self._global = TransformerGlobalConfig()
        self.BERT = BERTLayerConfig()
        self.HitAnomaly = HitAnomalyLayerConfig()

    def load(self, path):
        try:
            with open(path) as f:
                transformer_config = yaml.load(f, Loader=yaml.FullLoader)
        except FileNotFoundError as e:
            logger.warning(e)
            return None

        self._global.load(transformer_config)
        self.BERT.load(transformer_config)
        self.HitAnomaly.load(transformer_config)

def set_attributes_from_object(self, *args):
    try:
        for obj in args:
            for attr_key, attr in obj.__dict__.items():
                setattr(self, attr_key, attr)
    except Exception as e:
        logger.warning(e)

## Metric Objects

### Loss Function

In [None]:
def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_sum(loss_) / tf.reduce_sum(mask)

### Accuracy Function

In [None]:
def accuracy_function(real, pred):
    accuracies = tf.equal(real, tf.argmax(pred, axis=1))

    mask = tf.math.logical_not(tf.math.equal(real, 0))
    accuracies = tf.math.logical_and(mask, accuracies)

    accuracies = tf.cast(accuracies, dtype=tf.float32)
    mask = tf.cast(mask, dtype=tf.float32)
    return tf.reduce_sum(accuracies) / tf.reduce_sum(mask)

### Custom Learning Rate Schedule

In [None]:
class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):

    def __init__(self, d_model: int, warmup_steps=4000):
        super(CustomSchedule, self).__init__()

        self.d_model = d_model
        self.d_model = tf.cast(self.d_model, tf.float32)
        self.warmup_steps = warmup_steps

    def __call__(self, step):
        arg1 = tf.math.rsqrt(step)
        arg2 = step * (self.warmup_steps ** -1.5)

        return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)

## Pipeline Objects

### PositionalEncodingLayer

In [None]:
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, max_steps, max_dims, dtype=tf.float32, **kwargs):
        super().__init__(dtype=dtype, **kwargs)
        if max_dims % 2 == 1: max_dims += 1  # max_dims must be even
        p, i = np.meshgrid(np.arange(max_steps), np.arange(max_dims // 2))
        pos_emb = np.empty((1, max_steps, max_dims))
        pos_emb[0, :, ::2] = np.sin(p / 10000 ** (2 * i / max_dims)).T
        pos_emb[0, :, 1::2] = np.cos(p / 10000 ** (2 * i / max_dims)).T
        self.positional_embedding = tf.constant(pos_emb.astype(self.dtype))

    def call(self, inputs):
        shape = tf.shape(inputs)
        return inputs + self.positional_embedding[:, :shape[-2], :shape[-1]]

### EncoderBlock

In [None]:
class EncoderBlock(Layer):

    def __init__(
        self,
        d_model: int,
        num_heads: int,
        dff: int,
        rate=0.1):
        super(EncoderBlock, self).__init__()

        self.multi_headed_attention = MultiHeadAttention(num_heads=num_heads,
                                                         key_dim=d_model // num_heads,
                                                         dropout=0.1,
                                                         attention_axes=(1))

        self.feed_forward_network = Sequential([
            Dense(dff, activation='relu'),  # (batch_size, seq_len, dff)
            Dense(d_model, activation='relu')  # (batch_size, seq_len, d_model)
        ])

        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)

        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, x, mask):
        # (1) - Attention Score
#         logger.info('MULTIHEADED ATTENTION')
#         logger.info(x.shape)
        attn_output, attn_weights = self.multi_headed_attention(
            x,
            x,
            return_attention_scores=True)  # (batch_size, input_seq_len, d_model)

        # (2) - Add & Normalize
        attn_output = self.dropout1(attn_output, training=True)
        out1 = self.layernorm1(x + attn_output)  # (batch_size, input_seq_len, d_model)

        # (3) - Feed Forward NN
        feed_forward_output = self.feed_forward_network(out1)  # (batch_size, input_seq_len, d_model)

        # (4) - Add & Normalize
        feed_forward_output = self.dropout2(feed_forward_output, training=True)
        out2 = self.layernorm2(out1 + feed_forward_output)  # (batch_size, input_seq_len, d_model)

        return tf.convert_to_tensor(out2), tf.convert_to_tensor(attn_weights)

### BERT

In [None]:
class BERTLayer(Layer):
    def __init__(
        self,
        global_config: TransformerGlobalConfig,
        config: BERTLayerConfig):
        super(BERTLayer, self).__init__()

        set_attributes_from_object(
            self,
            global_config,
            config)

        self.bert_layer_blocks = [EncoderBlock(
            self.d_model,
            self.num_attention_heads,
            self.dff,
            rate=self.dropout_rate) for _ in range(self.num_encoder_layers)]

    def call(self, input_: tf.tuple, **kwargs):
        enc_input = input_[0]
#         logger.info('BERT LAYER')
#         logger.info(enc_input.shape)
        encoding_padding_mask = None
        # BERT for Log Sequence Embedding
        for layer_idx in range(self.num_encoder_layers):
#             logger.info('BERT LAYER LOOP')
#             logger.info(enc_input.shape)
            enc_output, attention = self.bert_layer_blocks[layer_idx](enc_input, encoding_padding_mask)
            bert_ret = tf.tuple(enc_output, attention)
        return bert_ret

### HitAnomaly

In [None]:
class HitAnomalyLayer(Layer):
    def __init__(
        self,
        vocab_size: int,
        global_config: TransformerGlobalConfig,
        config: HitAnomalyLayerConfig):
        super(HitAnomalyLayer, self).__init__()

        self.vocab_size = vocab_size
        set_attributes_from_object(
            self,
            global_config,
            config)

        self.encoding_blocks = [EncoderBlock(
            self.d_model,
            self.num_attention_heads,
            self.dff,
            rate=self.dropout_rate
        ) for _ in range(self.num_encoder_layers)]

        self.hidden_layer_output = []

#     @tf.function(jit_compile=True)
    def call(self, input_: tf.tuple, **kwargs):
        enc_input = input_[0]
        encoding_padding_mask = None

        # Encoder Block Hidden Layers for Log Encoder
        # (batch_size, inp_seq_len, d_model), (batch_size, class, inp_seq_len, inp_seq_len)
        for layer_idx in range(self.num_encoder_layers - 1):
            enc_output, att = self.encoding_blocks[layer_idx](enc_input, encoding_padding_mask)
            self.hidden_layer_output.append(enc_output)

        fin_output = enc_output
        final_output = tf.reduce_mean(fin_output, axis=1)
        final_output = tf.expand_dims(final_output, axis=0)

        # Last Encoding Block for Log Sequence Representation
        out, att = self.encoding_blocks[self.num_encoder_layers - 1](final_output, encoding_padding_mask)
        self.hidden_layer_output.append(out)

        # Final Pooling Layer
        seq_representation = tf.reduce_mean(out, axis=1)

        return seq_representation, att

### Transformer

In [None]:
class Transformer(Model):

    def __init__(
        self,
        tokenizer: PrimeTokenizer,
        config: TransformerConfig):
        super(Transformer, self).__init__()

        self.vocab_size = tokenizer.get_vocab_size()
        set_attributes_from_object(
            self,
            config._global)

        self.embedding = Embedding(
            self.vocab_size,
            self.d_model,
            input_length=self.max_seq_len)

        self.pos_encoding = PositionalEncoding(
            self.max_seq_len,
            self.d_model)

        self.bert_layer = BERTLayer(
            config._global,
            config.BERT)

        self.hitanomaly_layer = HitAnomalyLayer(
            self.vocab_size,
            config._global,
            config.HitAnomaly)

        #self.dropout = Dropout(rate)

#     @tf.function(jit_compile=True)
    def call(self, input_tuple: tf.tuple, **kwargs):
        log_batch = input_tuple[0]
#         logger.info('INITIAL')
#         logger.info(log_batch.shape)
        encoding_padding_mask = None # input_tuple[1]

        embedding_tensor = self.embedding(log_batch) # (batch_size, input_seq_len, d_model)
#         logger.info('POST EMBEDDING LAYER')
#         logger.info(embedding_tensor.shape)

        embedding_tensor = self.pos_encoding(embedding_tensor)
#         logger.info('POST POSITIONAL ENCODING')
#         logger.info(embedding_tensor.shape)
        #embedding_tensor = self.dropout(embedding_tensor, training=TRAINING)

        # BERT for Log Sequence Embedding
        bert_arg = tf.tuple(embedding_tensor, encoding_padding_mask)
        bert_ret = self.bert_layer(bert_arg)

        # Encoder Block Hidden Layers for Log Sequence Representation
#         seq_representation, att = self.hitanomaly_layer(tf.tuple(enc_output, encoding_padding_mask))


        return bert_ret

## Transformer Main

### Batch Processing

### Main (Initialization)

In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

# -- Transformer Model -- #
transformer_config_path = SOURCE + '/assets/notebooks/TransformerConfig.yaml'
transformer_config = TransformerConfig()
transformer_config.load(transformer_config_path)
# optimus_prime = Transformer(prime_tokenizer, transformer_config)

t_config = transformer_config._global

# -- Pipeline Info -- #
attns = []

# -- Data Batches -- #
# batched_dataset = process_all_batches(n_iter, log_labels, t_config.batch_size)

# -- Model Metrics -- #
learning_rate = CustomSchedule(t_config.d_model)
epoch_loss = Mean(name='train_loss')
epoch_accuracy = Mean(name='train_accuracy')
loss_object = SparseCategoricalCrossentropy(from_logits=True)
optimizer = Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9)

# -- Classification Step Layers -- #
add_att_layer = AdditiveAttention()
softmax = Softmax()
s1 = Sequential([
    Dense(t_config.batch_size, activation=t_config.activation),
    Dense(4, activation=t_config.activation),
    Softmax()
])

# -- Checkpoints -- #
# checkpoint_path = SOURCE + "checkpoints/"
# checkpoint = Checkpoint(step=tf.Variable(1), transformer=optimus_prime, optimizer=optimizer)
# checkpoint_manager = CheckpointManager(checkpoint, checkpoint_path, max_to_keep=5)

# tf.debugging.set_log_device_placement(True)
writer = tf.summary.create_file_writer(SOURCE + t_config.logdir)

### TrainStep

In [None]:
batch_size = transformer_config._global.batch_size
max_seq_len = transformer_config._global.max_seq_len

train_step_signature = [
    tf.TensorSpec(shape=([batch_size, max_seq_len]), dtype=tf.int32),
    tf.TensorSpec(shape=([batch_size]), dtype=tf.int8)
]

@tf.function(input_signature=train_step_signature,
             experimental_compile=True)
def train_step(log_batch: tf.Tensor, labels: tf.Tensor):

    transformer_input = tf.tuple([
        log_batch,  # <tf.Tensor: shape=(batch_size, max_seq_len), dtype=int32>
        labels  # <tf.Tensor: shape=(batch_size, num_classes), dtype=float32>
    ])

    with tf.GradientTape() as tape:
        transformer_ret = optimus_prime(transformer_input)
#         a_s = add_att_layer([Rs, Rs])
#         y = softmax(a_s * Rs)
#         print(a_s.shape)
        # y = Rs
#         loss = tf.py_function(loss_function, [labels, y], tf.float32)
#         pred = s1(y)
#         labels = tf.cast(labels, tf.int8)
    # Optimize the model
#     grads = tape.gradient(loss, optimus_prime.trainable_variables)
#     optimizer.apply_gradients(zip(grads, optimus_prime.trainable_variables))

#     acc = accuracy_function(labels, pred)

    # Tracking Progress
#     epoch_loss.update_state(loss)  # Adding Batch Loss
#     epoch_accuracy.update_state(acc)

    return transformer_ret

In [None]:
train_step_signature

### Main (Training)

In [None]:
attentions = []

for epoch in range(t_config.epoch):

    start = time.time()
    epoch_loss.reset_states()
    epoch_accuracy.reset_states()
    dataset_iter = iter(batched_dataset)

    t = tqdm(range(n_iter), desc="Epoch: {:03d}, Loss: {:.3f}, Accuracy: {:.3%}".format(0, 0, 0), position=0, leave=True)
    for _ in t:
        batch = next(dataset_iter)
        log_batch = batch[0]
        labels = batch[1]

        # Returns Eager Tensor for Predictions
#         tf.summary.trace_on()
#         tf.profiler.experimental.start(SOURCE + t_config.logdir)

#         with writer.as_default():
        transforer_ret = train_step(log_batch, labels)
        attentions.append(transformer_ret)
          # with tf.summary.record_if(True):

#             tf.summary.trace_export(
#               name = "training_trace",
#               step=0,
#               profiler_outdir=SOURCE + t_config.logdir
#             )

#         tf.profiler.experimental.stop()
#         tf.summary.trace_off()

#         checkpoint.step.assign_add(1)

#         if int(checkpoint.step) % 10 == 0:
#             save_path = checkpoint_manager.save()

        t.set_description(desc="Epoch: {:03d}, Loss: {:.3f}, Accuracy: {:.3%} ".format(epoch,
                                                                    epoch_loss.result(),
                                                                    epoch_accuracy.result()))
        t.refresh()

In [None]:
joblib.dump(acc, SOURCE + '/results/attention.joblib')

In [None]:
joblib.dump(Rs, SOURCE + '/results/Rs.joblib')