# Telecom Service Assurance AI Model (Transformer NN) for Latency Insights
Author: Fatih E. NAR

## Introduction
In this notebook, we showcase a machine learning model to create latency predictions for telecom networks.

In [None]:
# Install the required packages
%pip install -r requirements.txt
%pip install torch torchvision torchaudio

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from torch.cuda.amp import autocast, GradScaler
from sklearn.preprocessing import MinMaxScaler

# Check if MPS (Metal Performance Shaders) or CUDA is available
device = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")

# Load the generated data
data = pd.read_csv('data/servass_data.csv.xz', compression='xz', parse_dates=['timestamp'])

# Inspect the data for problematic values
print("Initial data inspection:")
print(data.head())
print(data.info())

# Replace string representations of empty lists with NaN
for col in ['latency', 'jitter', 'packet_loss', 'throughput', 'cpu_usage', 'memory_usage']:
    data[col] = data[col].replace('[]', np.nan)

# Ensure all relevant columns are numeric and replace non-numeric values with NaN
for col in ['latency', 'jitter', 'packet_loss', 'throughput', 'cpu_usage', 'memory_usage']:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Impute missing values in numeric columns instead of dropping rows
numeric_cols = ['latency', 'jitter', 'packet_loss', 'throughput', 'cpu_usage', 'memory_usage']
data[numeric_cols] = data[numeric_cols].fillna(data[numeric_cols].mean())

# Normalize the data
scaler = MinMaxScaler()
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

# Create sequences
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length):
        sequences.append(data[i:i + seq_length].values)
    return np.array(sequences)

seq_length = 30  # Length of the sequences (e.g., 30 time steps)
sequences = create_sequences(data[numeric_cols], seq_length)

# Ensure sequences are numeric
for i in range(sequences.shape[0]):
    for j in range(sequences.shape[1]):
        for k in range(sequences.shape[2]):
            if isinstance(sequences[i, j, k], str):
                sequences[i, j, k] = np.nan

# Convert to float32
X = sequences[:, :-1, :].astype(np.float32)  # Input sequences
y = sequences[:, -1, :].astype(np.float32)   # Corresponding labels

# Drop any remaining NaN values
nan_mask = ~np.isnan(X).any(axis=(1, 2)) & ~np.isnan(y).any(axis=1)
X = X[nan_mask]
y = y[nan_mask]

# Check shapes of the datasets
print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')
print(f'X_train shape: {X[:int(0.8 * len(X))].shape}')
print(f'X_val shape: {X[int(0.8 * len(X)):].shape}')
print(f'y_train shape: {y[:int(0.8 * len(y))].shape}')
print(f'y_val shape: {y[int(0.8 * len(y)):].shape}')

# Ensure there's sufficient data for training
if len(X) < 32:
    raise ValueError('Not enough data to train the model. Increase the dataset size.')

# Split the data into training and validation sets
train_size = int(0.8 * len(X))
X_train, X_val = X[:train_size], X[train_size:]
y_train, y_val = y[:train_size], y[train_size:]

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_val = torch.tensor(X_val, dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.float32)

train_dataset = TensorDataset(X_train, y_train)
val_dataset = TensorDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256)


In [None]:
# Transformer Block & Model Implementation
class TransformerBlock(nn.Module):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
        self.ffn = nn.Sequential(
            nn.Linear(embed_dim, ff_dim),
            nn.ReLU(),
            nn.Linear(ff_dim, embed_dim)
        )
        self.layernorm1 = nn.LayerNorm(embed_dim)
        self.layernorm2 = nn.LayerNorm(embed_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        attn_output, _ = self.attention(x, x, x)
        x = self.layernorm1(x + self.dropout(attn_output))
        ffn_output = self.ffn(x)
        x = self.layernorm2(x + self.dropout(ffn_output))
        return x
    
class TransformerModel(nn.Module):
    def __init__(self, input_shape, embed_dim, num_heads, ff_dim, num_layers):
        super(TransformerModel, self).__init__()
        self.dense = nn.Linear(input_shape[-1], embed_dim)
        self.transformer_blocks = nn.ModuleList(
            [TransformerBlock(embed_dim, num_heads, ff_dim) for _ in range(num_layers)]
        )
        self.global_avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc1 = nn.Linear(embed_dim, 128)
        self.dropout = nn.Dropout(0.1)
        self.fc2 = nn.Linear(128, input_shape[-1])

    def forward(self, x):
        x = self.dense(x)
        x = x.permute(1, 0, 2)  # (batch_size, seq_length, embed_dim) -> (seq_length, batch_size, embed_dim)
        for transformer in self.transformer_blocks:
            x = transformer(x)
        x = x.permute(1, 2, 0)  # (seq_length, batch_size, embed_dim) -> (batch_size, embed_dim, seq_length)
        x = self.global_avg_pool(x)
        x = x.squeeze(-1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

input_shape = (seq_length - 1, X.shape[-1])
model = TransformerModel(input_shape, embed_dim=64, num_heads=4, ff_dim=128, num_layers=2).to(device)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# Set up mixed precision training
scaler = GradScaler()

# Implement early stopping
class EarlyStopping:
    def __init__(self, patience=10, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss:
            self.counter += 1
            if self.counter >= self.patience:
                if self.verbose:
                    print("Early stopping")
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

In [None]:
# Training loop with early stopping
early_stopping = EarlyStopping(patience=10, verbose=True)
num_epochs = 10
train_losses = []
val_losses = []

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        optimizer.zero_grad()
        with autocast():
            outputs = model(inputs)
            loss = criterion(outputs, targets)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        
        train_loss += loss.item() * inputs.size(0)
    
    train_loss = train_loss / len(train_loader.dataset)
    
    # Validation loop
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item() * inputs.size(0)
    
    val_loss = val_loss / len(val_loader.dataset)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    
    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    early_stopping(val_loss)
    if early_stopping.early_stop:
        break

In [None]:
model.eval()

In [None]:
y_pred = []
with torch.no_grad():
    for inputs in val_loader:
        inputs = inputs[0].to(device)
        outputs = model(inputs)
        y_pred.append(outputs.cpu().numpy())
y_pred = np.concatenate(y_pred, axis=0)

In [None]:
# Rescale the predictions and actual values back to the original scale
y_pred_rescaled = scaler.inverse_transform(y_pred)
y_val_rescaled = scaler.inverse_transform(y_val)

# Calculate MAPE
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_val_rescaled, y_pred_rescaled)
print(f'MAPE: {mape:.2f}%')

In [None]:
plt.figure(figsize=(20, 5))
plt.plot(range(len(y_val_rescaled)), y_val_rescaled, label='Actual Latency')
plt.plot(range(len(y_pred_rescaled)), y_pred_rescaled, label='Predicted Latency')
plt.title('Actual vs Predicted Latency')
plt.xlabel('Time Steps')
plt.ylabel('Latency')
plt.legend()
plt.show()