### Imports

In [None]:
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.optim as optim
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

### Model Definition

In [None]:
import torch #???
import torch.nn as nn #???

class BiLSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, time_input_dim, output_dim, dropout=0.3):
        super(BiLSTMClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm_seq = nn.LSTM(embed_dim, hidden_dim, batch_first=True, bidirectional=True)

        self.time_fc = nn.Sequential(
            nn.Linear(time_input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout)
        )

        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 2 + hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, seq_input, time_input):
        embedded = self.embedding(seq_input)                    # [B, L] -> [B, L, D]
        lstm_out, _ = self.lstm_seq(embedded)                   # -> [B, L, 2H]
        lstm_out = lstm_out[:, -1, :]  # Last time step   # -> [B, 2H]

        time_mean = torch.mean(time_input, dim=1)               # [B, L, 3] -> [B, 3]
        time_features = self.time_fc(time_mean)                # -> [B, H]

        combined = torch.cat((lstm_out, time_features), dim=1)  # [B, 3H]
        return self.classifier(combined)
    

###  Data Loading

In [None]:
data_dir = "./dataBiLSTM/"

X_seq = np.load(data_dir + "X_seq.npy")
X_time = np.load(data_dir + "X_time.npy")
y = np.load(data_dir + "y.npy")

print("X_seq shape:", X_seq.shape)
print("X_time shape:", X_time.shape)
print("y shape:", y.shape)

### Train/Test Split

In [None]:
X_seq_train, X_seq_test, X_time_train, X_time_test, y_train, y_test = train_test_split(
    X_seq, X_time, y, test_size = 0.2, random_state=169783, stratify=y
)

### Datasets and DataLoaders

In [None]:
# Convert to tensor
X_seq_train_tensor = torch.tensor(X_seq_train, dtype=torch.long)
X_seq_test_tensor = torch.tensor(X_seq_test, dtype=torch.long)

X_time_train_tensor = torch.tensor(X_time_train, dtype=torch.float32)
X_time_test_tensor = torch.tensor(X_time_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# DataLoader
batch_size = 64

train_ds = TensorDataset(X_seq_train_tensor, X_time_train_tensor, y_train_tensor)
test_ds = TensorDataset(X_seq_test_tensor, X_time_test_tensor, y_test_tensor)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=batch_size)

### Model Initialization

In [None]:
# Model parameters
vocab_size = int(X_seq.max() + 1) # Number of unique endpoints
embed_dim = 64
hidden_dim = 128
time_input_dim = 3
output_dim = int(np.max(y) + 1)

model = BiLSTMClassifier(
    vocab_size=vocab_size,
    embed_dim=embed_dim,
    hidden_dim=hidden_dim,
    time_input_dim=time_input_dim,
    output_dim=output_dim
)

### Training Loop

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

n_epochs = 10

train_losses = []
train_accuracies = []

for epoch in range(n_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for seq_batch, time_batch, y_batch in train_loader:
        seq_batch = seq_batch.to(device)
        time_batch = time_batch.to(device)
        y_batch = y_batch.to(device)
        
        optimizer.zero_grad()
        outputs = model(seq_batch, time_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        
        preds = outputs.argmax(dim=1)
        correct += (preds == y_batch).sum().item()
        total += y_batch.size(0)
        
    acc = correct / total
    train_losses.append(total_loss / len(train_loader))
    train_accuracies.append(acc)
        
    print(f"\nEpoch {epoch + 1}/{n_epochs} - Loss: {total_loss / len(train_loader):.4f} - Accuracy: {acc:.4f}")

### Evaluation

In [None]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for seq_batch, time_batch, y_batch in test_loader:
        seq_batch = seq_batch.to(device)
        time_batch = time_batch.to(device)
        
        outputs = model(seq_batch, time_batch)
        preds = outputs.argmax(dim=1).cpu().numpy()
        labels = y_batch.numpy()
        
        all_preds.extend(preds)
        all_labels.extend(labels)
        
print("Classification report: ")
print(classification_report(all_labels, all_preds))

### Saving the Model

In [None]:
model_path = "./models/bilstm_model.pt"
torch.save(model.state_dict(), model_path)
print(f"The model has been saved to {model_path}")

### Results Visualization

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, f1_score

cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap=plt.cm.Blues)
plt.title("BiLSTM - Confusion Matrix")
plt.show()

import seaborn as sns

# Error matrix in percent
cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(6, 5))
sns.heatmap(cm_normalized, annot=True, fmt=".2f", cmap="Blues")
plt.title("BiLSTM - Normalized Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

f1 = f1_score(all_labels, all_preds, average="weighted")
print(f"F1-score (weighted): {f1:.4f}")

In [None]:
from sklearn.metrics import f1_score #???

# F1 for every class
labels_unique = np.unique(all_labels)
f1_per_class = f1_score(all_labels, all_preds, average=None)

plt.bar(range(len(f1_per_class)), f1_per_class, tick_label=labels_unique)
plt.title("F1-score per Class")
plt.xlabel("Class")
plt.ylabel("F1-score")
plt.ylim(0, 1)
plt.grid(True)
plt.show()

### Visualisation

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12, 4))

# Loss
axs[0].plot(range(1, n_epochs + 1), train_losses, marker='o', color='blue')
axs[0].set_title("Training Loss per Epoch")
axs[0].set_xlabel("Epoch")
axs[0].set_ylabel("Loss")
axs[0].grid(True)

# Accuracy
axs[1].plot(range(1, n_epochs + 1), train_accuracies, marker='o', color='green')
axs[1].set_title("Training Accuracy per Epoch")
axs[1].set_xlabel("Epoch")
axs[1].set_ylabel("Accuracy")
axs[1].grid(True)

plt.tight_layout()
plt.show()