<a href="https://colab.research.google.com/github/mostafa-ja/Anomaly-detection/blob/main/TCN2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
# Create the TCN model and load pre-trained weights if available
input_size = 1  # Assuming each log template is represented by a single number
output_size = 28  # Number of classes in the one-hot encodings
num_channels = [16, 32]
kernel_size = 5
dropout = 0.2

In [16]:
num_blocks = len(num_channels)  # Number of blocks in the TCN
dilations = [2 ** i for i in range(num_blocks)]  # [1, 2, 4] as before

receptive_field_per_layer = (kernel_size - 1) * (max(dilations) - 1) + 1

total_receptive_field = receptive_field_per_layer * num_blocks

print(f"Total Receptive Field of TCN (with each block consisting of two layers): {total_receptive_field}")


Total Receptive Field of TCN (with each block consisting of two layers): 10


In [17]:

# download datasets
!wget 'https://raw.githubusercontent.com/donglee-afar/logdeep/master/data/hdfs/hdfs_train'
!wget 'https://raw.githubusercontent.com/donglee-afar/logdeep/master/data/hdfs/hdfs_test_normal'
!wget 'https://raw.githubusercontent.com/donglee-afar/logdeep/master/data/hdfs/hdfs_test_abnormal'

--2023-07-29 15:18:38--  https://raw.githubusercontent.com/donglee-afar/logdeep/master/data/hdfs/hdfs_train
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 257875 (252K) [text/plain]
Saving to: ‘hdfs_train.1’


2023-07-29 15:18:38 (8.04 MB/s) - ‘hdfs_train.1’ saved [257875/257875]

--2023-07-29 15:18:38--  https://raw.githubusercontent.com/donglee-afar/logdeep/master/data/hdfs/hdfs_test_normal
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 29284282 (28M) [text/plain]
Saving to: ‘hdfs_test_normal.1’


2023-07-29 15:18:38 (177 MB/s

In [18]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
import time
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
class Chomp1d(nn.Module):
    def __init__(self, chomp_size):
        """
        Removes the extra elements at the end of the input tensor along the time dimension.

        Args:
        - chomp_size (int): The number of elements to be removed from the end of each input tensor.
        """
        super(Chomp1d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        """
        Forward pass of Chomp1d.

        Args:
        - x (torch.Tensor): Input tensor with dimension (N, C, L). (batch size, number of input features(channels), input sequence length)

        Returns:
        - torch.Tensor: Output tensor with dimension (N, C, L - chomp_size).
        """
        return x[:, :, :-self.chomp_size].contiguous()


class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        """
        A single temporal block in the Temporal Convolutional Network.

        Args:
        - n_inputs (int): Number of input channels.
        - n_outputs (int): Number of output channels.
        - kernel_size (int): Size of the convolutional kernels.
        - stride (int): Stride for the convolutional layers.
        - dilation (int): Dilation rate for the convolutional layers.
        - padding (int): Padding size for the convolutional layers.
        - dropout (float): Dropout rate to be applied after each convolutional layer.
        """
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp1d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp1d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)

        # The downsample variable stores a 1x1 convolutional layer
        # (if n_inputs is not equal to n_outputs) to match the input and output dimensions of the block. Otherwise, it is set to None.
        #IMPORTANT : The purpose of self.downsample is to adjust the dimensions of the input tensor so that it can be added element-wise to the output of the
        #temporal block,This addition is part of the residual connection that helps with training deeper networks and improves gradient flow during
        # backpropagation. CAREFUL, because here we have kernel size one, we dont need  Chomp1d() to equlize their size
        self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None

        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        """
        Initialize the weights of the convolutional layers with a normal distribution (mean=0, std=0.01).
        """
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        """
        Forward pass of TemporalBlock.

        Args:
        - x (torch.Tensor): Input tensor with dimension (N, C, L).

        Returns:
        - torch.Tensor: Output tensor with dimension (N, C, L).
        """
        out = self.net(x)
        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)


class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        """
        Temporal Convolutional Network (TCN) for sequence processing.
        This class represents the Temporal Convolutional Network (TCN) architecture that contains multiple temporal blocks stacked together.

        Args:
        - num_inputs (int): Number of input channels.
        - num_channels (list of int): Number of channels for each temporal block.
        - kernel_size (int): Size of the convolutional kernels in the temporal blocks.
        - dropout (float): Dropout rate to be applied after each temporal block.
        """
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            #The input channels for each block are determined based on the number of input channels (num_inputs) for the first block,
            #and for subsequent blocks, the number of input channels is set to the number of output channels of the previous block (num_channels[i-1]).
            in_channels = num_inputs if i == 0 else num_channels[i - 1]
            out_channels = num_channels[i]

            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size - 1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        """
        Forward pass of TemporalConvNet.

        Args:
        - x (torch.Tensor): Input tensor with dimension (N, C, L).

        Returns:
        - torch.Tensor: Output tensor with dimension (N, C, L).
        """
        return self.network(x)



class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout):
        """
        Temporal Convolutional Network (TCN) for sequence classification.

        Args:
        - input_size (int): The number of input features (C_in).
        - output_size (int): The number of classes for classification.
        - num_channels (list of int): Number of channels for each TCN layer.
        - kernel_size (int): The size of the convolutional kernels in TCN layers.
        - dropout (float): The dropout rate to be applied after each TCN layer.
        """
        super(TCN, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size=kernel_size, dropout=dropout)
        self.linear = nn.Linear(num_channels[-1], output_size)

    def forward(self, inputs):
        """
        Forward pass of the TCN model.

        Args:
        - inputs (torch.Tensor): Input tensor with dimension (N, C_in, L_in).(batch size, number of input features(channels), input sequence length)

        Returns:
        - torch.Tensor: The model's output tensor with dimension (N, output_size).
        """
        # Input should have dimension (N, C_in, L_in)
        y = self.tcn(inputs)
        o = self.linear(y[:, :, -1])
        return o


In [None]:
class AttentionBlock(nn.Module):
  """An attention mechanism similar to Vaswani et al (2017)
  The input of the AttentionBlock is `BxTxD` where `B` is the input
  minibatch size, `T` is the length of the sequence `D` is the dimensions of
  each feature.
  The output of the AttentionBlock is `BxTx(D+V)` where `V` is the size of the
  attention values.
  Arguments:
      dims (int): the number of dimensions (or channels) of each element in
          the input sequence
      k_size (int): the size of the attention keys
      v_size (int): the size of the attention values
      seq_len (int): the length of the input and output sequences
  """
  def __init__(self, dims, k_size, v_size, seq_len=None):
    super(AttentionBlock, self).__init__()
    self.key_layer = nn.Linear(dims, k_size)
    self.query_layer = nn.Linear(dims, k_size)
    self.value_layer = nn.Linear(dims, v_size)
    self.sqrt_k = math.sqrt(k_size)

  def forward(self, minibatch):
    keys = self.key_layer(minibatch)
    queries = self.query_layer(minibatch)
    values = self.value_layer(minibatch)
    logits = torch.bmm(queries, keys.transpose(2,1))
    # Use numpy triu because you can't do 3D triu with PyTorch
    # TODO: using float32 here might break for non FloatTensor inputs.
    # Should update this later to use numpy/PyTorch types of the input.
    mask = np.triu(np.ones(logits.size()), k=1).astype('uint8')
    mask = torch.from_numpy(mask).cuda()
    # do masked_fill_ on data rather than Variable because PyTorch doesn't
    # support masked_fill_ w/-inf directly on Variables for some reason.
    logits.data.masked_fill_(mask, float('-inf'))
    probs = F.softmax(logits, dim=1) / self.sqrt_k
    read = torch.bmm(probs, values)
    return minibatch + read

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(ConvBlock, self).__init__()
        self.conv1d = nn.Conv1d(in_channels, out_channels, kernel_size, stride=stride, padding=padding)
        self.max_pool = nn.MaxPool1d(kernel_size=2)  # Assuming max-pooling with kernel_size=2
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv1d(x)
        x = self.max_pool(x)
        x = self.relu(x)
        return x


class AttentionBlock(nn.Module):
  """An attention mechanism similar to Vaswani et al (2017)
  The input of the AttentionBlock is `BxTxD` where `B` is the input
  minibatch size, `T` is the length of the sequence `D` is the dimensions of
  each feature.
  The output of the AttentionBlock is `BxTx(D+V)` where `V` is the size of the
  attention values.
  Arguments:
      dims (int): the number of dimensions (or channels) of each element in
          the input sequence
      k_size (int): the size of the attention keys
      v_size (int): the size of the attention values
      seq_len (int): the length of the input and output sequences
  """
  def __init__(self, dims, k_size, v_size, seq_len=None):
    super(AttentionBlock, self).__init__()
    self.key_layer = nn.Linear(dims, k_size)
    self.query_layer = nn.Linear(dims, k_size)
    self.value_layer = nn.Linear(dims, v_size)
    self.sqrt_k = math.sqrt(k_size)

  def forward(self, minibatch):
    keys = self.key_layer(minibatch)
    queries = self.query_layer(minibatch)
    values = self.value_layer(minibatch)
    logits = torch.bmm(queries, keys.transpose(2,1))
    # Use numpy triu because you can't do 3D triu with PyTorch
    # TODO: using float32 here might break for non FloatTensor inputs.
    # Should update this later to use numpy/PyTorch types of the input.
    mask = np.triu(np.ones(logits.size()), k=1).astype('uint8')
    mask = torch.from_numpy(mask).cuda()
    # do masked_fill_ on data rather than Variable because PyTorch doesn't
    # support masked_fill_ w/-inf directly on Variables for some reason.
    logits.data.masked_fill_(mask, float('-inf'))
    probs = F.softmax(logits, dim=1) / self.sqrt_k
    read = torch.bmm(probs, values)
    return minibatch + read


class ACNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, kernel_size, stride, padding):
        super(ACNN, self).__init__()
        self.conv_block1 = ConvBlock(input_size, hidden_size, kernel_size, stride, padding)
        self.conv_block2 = ConvBlock(hidden_size, hidden_size, kernel_size, stride, padding)
        self.attention_layer = AttentionLayer(hidden_size)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.attention_layer(x)
        x = torch.sum(x, dim=2)  # Sum along the time dimension
        x = self.linear(x)
        return x


In [20]:
name = 'hdfs_train'
window_size = 10
num_sessions = 0
inputs = []
outputs = []

with open('/content/' + name, 'r') as f:
        for row in f:
            num_sessions += 1
            line = [ (int(i)-1) for i in row.strip().split()]
            for i in range(len(line) - window_size):
                inputs.append(line[i:i + window_size])
                outputs.append(line[i + window_size])

print('Number of sessions({}): {}'.format(name, num_sessions))
print('Number of seqs({}): {}'.format(name, len(inputs)))
dataset = TensorDataset(torch.tensor(inputs, dtype=torch.float), torch.tensor(outputs))

Number of sessions(hdfs_train): 4855
Number of seqs(hdfs_train): 46575


In [40]:
# Create the TCN model and load pre-trained weights if available
input_size = 1  # Assuming each log template is represented by a single number
output_size = 28  # Number of classes in the one-hot encodings
num_channels = [16, 32, 64]
kernel_size = 3
dropout = 0.2
batch_size = 2048
num_epochs = 400

model = TCN(input_size, output_size, num_channels, kernel_size, dropout).to(device)

dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=True)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()


In [41]:
def adjust_learning_rate(optimizer, epoch, lr_step=(300, 350), lr_decay_ratio=0.1):
    """Adjust the learning rate based on the epoch number."""
    if epoch == 0:
        optimizer.param_groups[0]['lr'] /= 32
    elif epoch in [3, 6, 9, 12, 15]:  # in step five , we finish warm up ,and start normal learning rate
        optimizer.param_groups[0]['lr'] *= 2
    if epoch in lr_step: # in these steps , we are geting close to optimal point so we need to have shorter step
        optimizer.param_groups[0]['lr'] *= lr_decay_ratio
    return optimizer

# Define options here
options = {
    'lr': 0.001,
    'lr_step': (300, 350), #steps(epoch) for updating learning rate
    'lr_decay_ratio': 0.1,
    # Add other options here
}

optimizer = optim.Adam(model.parameters(), lr=options['lr'], betas=(0.9, 0.999))

In [42]:
# Train the model
start_time = time.time()
total_step = len(dataloader)
for epoch in range(num_epochs):  # Loop over the dataset multiple times
    optimizer = adjust_learning_rate(optimizer, epoch, options['lr_step'], options['lr_decay_ratio'])
    print(optimizer.param_groups[0]['lr'])
    train_loss = 0
    for step, (seq, label) in enumerate(dataloader):
        # Forward pass
        seq = seq.clone().detach().view(-1, input_size, window_size).to(device)
        output = model(seq)
        loss = criterion(output, label.to(device))

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
    print('Epoch [{}/{}], train_loss: {:.4f}'.format(epoch + 1, num_epochs, train_loss / total_step))
elapsed_time = time.time() - start_time
print('elapsed_time: {:.3f}s'.format(elapsed_time))
print('Finished Training')


3.125e-05
Epoch [1/400], train_loss: 3.3178
3.125e-05
Epoch [2/400], train_loss: 3.2599
3.125e-05
Epoch [3/400], train_loss: 3.1785
6.25e-05
Epoch [4/400], train_loss: 2.9664
6.25e-05
Epoch [5/400], train_loss: 2.5037
6.25e-05
Epoch [6/400], train_loss: 2.1759
0.000125
Epoch [7/400], train_loss: 2.0356
0.000125
Epoch [8/400], train_loss: 1.9346
0.000125
Epoch [9/400], train_loss: 1.8783
0.00025
Epoch [10/400], train_loss: 1.8174
0.00025
Epoch [11/400], train_loss: 1.7463
0.00025
Epoch [12/400], train_loss: 1.6927
0.0005
Epoch [13/400], train_loss: 1.6263
0.0005
Epoch [14/400], train_loss: 1.5354
0.0005
Epoch [15/400], train_loss: 1.4464
0.001
Epoch [16/400], train_loss: 1.3269
0.001
Epoch [17/400], train_loss: 1.1586
0.001
Epoch [18/400], train_loss: 0.9889
0.001
Epoch [19/400], train_loss: 0.8490
0.001
Epoch [20/400], train_loss: 0.7459
0.001
Epoch [21/400], train_loss: 0.6859
0.001
Epoch [22/400], train_loss: 0.6453
0.001
Epoch [23/400], train_loss: 0.6129
0.001
Epoch [24/400], train

In [24]:
def generate(name):
    window_size = 10
    hdfs = {} #store the unique sequences and their counts.
    length = 0
    with open('/content/' + name, 'r') as f:
        for ln in f.readlines():
            ln = [(int(i)-1) for i in ln.strip().split()]
            ln = ln + [-1] * (window_size + 1 - len(ln))     #ensure that all sequences have a fixed length of window_size + 1, even if the original line had fewer elements.
            hdfs[tuple(ln)] = hdfs.get(tuple(ln), 0) + 1   #If the tuple is not present in the dictionary, hdfs.get(tuple(ln), 0) returns 0, and the code initializes the count to 1.
            length += 1
    print('Number of sessions({}): {}'.format(name, len(hdfs)))
    return hdfs, length


In [25]:
test_normal_loader, test_normal_length = generate('hdfs_test_normal')
test_abnormal_loader, test_abnormal_length = generate('hdfs_test_abnormal')


Number of sessions(hdfs_test_normal): 14177
Number of sessions(hdfs_test_abnormal): 4123


In [26]:
num_candidates = 9

In [43]:

# Test the model
model.eval()

TP = 0
FP = 0

start_time = time.time()
with torch.no_grad():
    for line in tqdm(test_normal_loader.keys()):
        for i in range(len(line) - window_size):
            seq = line[i:i + window_size]
            label = line[i + window_size]
            seq = torch.tensor(seq, dtype=torch.float).view(-1, input_size, window_size).to(device)
            label = torch.tensor(label).view(-1).to(device)
            output = model(seq)
            predicted = torch.argsort(output, 1)[0][-num_candidates:]
            if label not in predicted:
                FP += test_normal_loader[line] # numbers of that set we have
                break   #with just one wrong prediction in a line , we assume , abnormal
with torch.no_grad():
    for line in tqdm(test_abnormal_loader.keys()):
        for i in range(len(line) - window_size):
            seq = line[i:i + window_size]
            label = line[i + window_size]
            seq = torch.tensor(seq, dtype=torch.float).view(-1, input_size, window_size).to(device)
            label = torch.tensor(label).view(-1).to(device)
            output = model(seq)
            predicted = torch.argsort(output, 1)[0][-num_candidates:]
            if label not in predicted:
                TP += test_abnormal_loader[line]
                break
elapsed_time = time.time() - start_time
print('elapsed_time: {:.3f}s'.format(elapsed_time))
# Compute precision, recall and F1-measure
FN = test_abnormal_length - TP
P = 100 * TP / (TP + FP)
R = 100 * TP / (TP + FN)
F1 = 2 * P * R / (P + R)
print('false positive (FP): {}, false negative (FN): {}, Precision: {:.3f}%, Recall: {:.3f}%, F1-measure: {:.3f}%'.format(FP, FN, P, R, F1))
print('Finished Predicting')

100%|██████████| 14177/14177 [06:57<00:00, 33.96it/s]
100%|██████████| 4123/4123 [01:16<00:00, 54.04it/s]

elapsed_time: 493.824s
false positive (FP): 1146, false negative (FN): 943, Precision: 93.275%, Recall: 94.400%, F1-measure: 93.834%
Finished Predicting



