In [1]:
from sklearn.discriminant_analysis import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import Dataset
from scapy.all import rdpcap, IP, TCP, UDP, PcapReader
import numpy as np
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

class PacketSequenceDataset(Dataset):
    def __init__(self, pcap_file, sequence_length, feature_dim, max_packets=50000, label=None):
        self.sequence_length = sequence_length
        self.feature_dim = feature_dim
        self.label = label
        self.scalers = [StandardScaler() for _ in range(feature_dim)]
        self.max_packets = max_packets
        self.data = self._parse_pcap(pcap_file)
        self._compute_feature_averages(self.data)
        self.data = self._scale_data(self.data)

    def _parse_pcap(self, pcap_file):
        try:
            with PcapReader(pcap_file) as pcap_reader:
                packets = [pcap_reader.read_packet() for _ in range(self.max_packets)]
        except Exception as e:
            logger.error(f"Failed to read pcap file: {e}")
            return np.zeros((1, self.sequence_length, self.feature_dim), dtype=np.float32)
        data = []

        def safe_convert(value):
            if isinstance(value, (int, float)):
                return value
            try:
                return int(value)
            except:
                try:
                    return float(value)
                except:
                    return 0

        expected_feature_count = self.feature_dim  # Use the initial feature_dim

        for packet in packets:
            features = []
            # IP features
            try:
                if IP in packet:
                    ip = packet[IP]
                    features.extend([
                        int.from_bytes(bytes(map(int, ip.src.split('.'))), byteorder='big'),
                        int.from_bytes(bytes(map(int, ip.dst.split('.'))), byteorder='big'),
                        safe_convert(ip.len),
                        safe_convert(ip.flags),
                        #safe_convert(ip.frag),
                        safe_convert(ip.ttl),
                        safe_convert(ip.proto)
                    ])
                else:
                    features.extend([0] * 7)
            except Exception as e:
                logger.warning(f"Error parsing IP features: {e}")
                features.extend([0] * 7)

            # TCP/UDP features
            try:
                if TCP in packet:
                    tcp = packet[TCP]
                    features.extend([
                        safe_convert(tcp.sport),
                        safe_convert(tcp.dport),
                        safe_convert(tcp.flags),
                        safe_convert(tcp.window)
                    ])
                elif UDP in packet:
                    udp = packet[UDP]
                    features.extend([
                        safe_convert(udp.sport),
                        safe_convert(udp.dport),
                        #safe_convert(udp.len)
                    ])
                    features.append(0)  # Padding to match TCP feature count
                else:
                    features.extend([0] * 4)
            except Exception as e:
                logger.warning(f"Error parsing TCP/UDP features: {e}")
                features.extend([0] * 4)

            # General packet features
            try:
                features.extend([
                    safe_convert(len(packet))
                ])
            except Exception as e:
                logger.warning(f"Error parsing general packet features: {e}")
                features.extend([0, 0.0])

            if len(features) < expected_feature_count:
                features.extend([0] * (expected_feature_count - len(features)))
            elif len(features) > expected_feature_count:
                features = features[:expected_feature_count]    

            data.append(features)

        num_samples = max(1, len(data) // self.sequence_length)
        padded_data = np.zeros((num_samples, self.sequence_length, self.feature_dim), dtype=np.float32)
        
        for i in range(num_samples):
            start_idx = i * self.sequence_length
            end_idx = min(start_idx + self.sequence_length, len(data))
            padded_data[i, :end_idx - start_idx, :] = data[start_idx:end_idx]

        logger.info(f"Parsed {len(data)} packets into {num_samples} samples with {self.feature_dim} features each")
        return np.array(padded_data, dtype=np.float32)
    
    def _compute_feature_averages(self, data):
        feature_means = np.mean(data, axis=(0, 1))
        feature_stds = np.std(data, axis=(0, 1))
        for i, (mean, std) in enumerate(zip(feature_means, feature_stds)):
            logger.info(f"Feature {i} - Mean: {mean:.4f}, Std: {std:.4f}")

    def _scale_data(self, data):
        try:
            num_samples, seq_len, feat_dim = data.shape
            scaled_data = np.zeros_like(data)

            for i in range(feat_dim):
                feature_data = data[:, :, i].reshape(-1, 1)

                # Fit and transform each feature separately
                scaled_feature = self.scalers[i].fit_transform(feature_data)
                scaled_data[:, :, i] = scaled_feature.reshape(num_samples, seq_len)

            logger.info("Data scaling completed successfully")
            return scaled_data
        except Exception as e:
            logger.error(f"Error during data scaling: {e}")
            return data 
    
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        if self.label is not None:
            return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.label, dtype=torch.float32)
        else:
            # For training, we return the input as both input and target
            return torch.tensor(self.data[idx], dtype=torch.float32), torch.tensor(self.data[idx], dtype=torch.float32)




In [2]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import DataLoader, ConcatDataset
from transformer_autoencoder import TransformerAutoencoder
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.nn.utils import clip_grad_norm_

# Check if CUDA is available and set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def create_dataloader(pcap_file, sequence_length, feature_dim, batch_size, label=None):
    dataset = PacketSequenceDataset(pcap_file=pcap_file, sequence_length=sequence_length, feature_dim=feature_dim, label=label)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False, drop_last=True)

def create_combined_dataloader(normal_pcap_file, malicious_pcap_file, sequence_length, feature_dim, batch_size):
    normal_dataset = PacketSequenceDataset(pcap_file=normal_pcap_file, sequence_length=sequence_length, feature_dim=feature_dim, label=0)
    malicious_dataset = PacketSequenceDataset(pcap_file=malicious_pcap_file, sequence_length=sequence_length, feature_dim=feature_dim, label=1)
    
    combined_data = ConcatDataset([normal_dataset, malicious_dataset])
    combined_loader = DataLoader(combined_data, batch_size=batch_size, shuffle=True)
    
    return combined_loader

def set_threshold(model, train_dataloader, percentile=90):
    model.eval()
    reconstruction_errors = []
    
    with torch.no_grad():
        for inputs, _ in train_dataloader:
            inputs = inputs.to(device)
            outputs, _ = model(inputs)
            recon_error = model.compute_batch_error((outputs, None), inputs)
            reconstruction_errors.extend(recon_error.cpu().numpy())
    
    threshold = np.percentile(reconstruction_errors, percentile)
    return threshold

def test_model(model, test_dataloader, writer, epoch, threshold):
    model.eval()
    all_labels = []
    all_predictions = []
    all_recon_errors = []

    with torch.no_grad():
        for inputs, labels in test_dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs, _ = model(inputs)
            recon_errors = model.compute_batch_error((outputs, None), inputs)
            
            predictions = (recon_errors > threshold).float()
            
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predictions.cpu().numpy())
            all_recon_errors.extend(recon_errors.cpu().numpy())

    # Calculate metrics
    all_labels = np.array(all_labels)
    all_predictions = np.array(all_predictions)
    all_recon_errors = np.array(all_recon_errors)

    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions)
    recall = recall_score(all_labels, all_predictions)
    f1 = f1_score(all_labels, all_predictions)

    # Log metrics to TensorBoard
    writer.add_scalar('Test/Accuracy', accuracy, epoch)
    writer.add_scalar('Test/Precision', precision, epoch)
    writer.add_scalar('Test/Recall', recall, epoch)
    writer.add_scalar('Test/F1-Score', f1, epoch)

    return accuracy, precision, recall, f1, all_recon_errors

def print_sample_reconstruction_errors(recon_errors, labels, num_samples=5):
    normal_errors = recon_errors[labels == 0]
    malicious_errors = recon_errors[labels == 1]

    print("\nSample Reconstruction Errors for Normal Data:")
    print(normal_errors[:num_samples])
    print(f"Average Reconstruction Error for Normal Data: {np.mean(normal_errors):.4f}")

    print("\nSample Reconstruction Errors for Malicious Data:")
    print(malicious_errors[:num_samples])
    print(f"Average Reconstruction Error for Malicious Data: {np.mean(malicious_errors):.4f}")


# Main training and testing loop
def train_and_test(model, train_dataloader, test_dataloader, num_epochs, learning_rate, weight_decay):
    writer = SummaryWriter()
    
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        
        for batch_idx, (inputs, _) in enumerate(train_dataloader):
            inputs = inputs.to(device)
            optimizer.zero_grad()
            
            # Data augmentation
            inputs_noisy = inputs + 0.1 * torch.randn_like(inputs)
            
            outputs, _ = model(inputs_noisy)
            loss = model.compute_loss((outputs, None), inputs)
            loss.backward()
            
            # Gradient clipping
            clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            epoch_loss += loss.item()

        avg_epoch_loss = epoch_loss / len(train_dataloader)
        print(f"Epoch {epoch} loss: {avg_epoch_loss}")
        writer.add_scalar('Train/Loss', avg_epoch_loss, epoch)
        
        # Learning rate scheduling
        scheduler.step(avg_epoch_loss)
        
        # Set threshold based on training data
        if epoch % 5 == 0 or epoch == num_epochs - 1:
            threshold = set_threshold(model, train_dataloader)
            
            # Test the model
            accuracy, precision, recall, f1, recon_errors = test_model(model, test_dataloader, writer, epoch, threshold)
    print_sample_reconstruction_errors(recon_errors, np.array([label for _, label in test_dataloader.dataset]), num_samples=5)
    writer.close()
    return model, threshold

# Usage example
sequence_length = 16
feature_dim = 12
num_epochs = 5
learning_rate = 1e-3
weight_decay = 1e-5

# Initialize model and move it to GPU
model = TransformerAutoencoder(input_size=12, sequence_length=sequence_length).to(device)

normal_pcap_file = 'C:/capture/Monday-WorkingHours.pcap'
malicious_pcap_file = 'C:/capture/xenorat_1hr.pcap'

train_dataloader = create_dataloader(normal_pcap_file, sequence_length=16, feature_dim=feature_dim, batch_size=4, label=0)
test_dataloader = create_combined_dataloader(normal_pcap_file, malicious_pcap_file, sequence_length=16, feature_dim=feature_dim, batch_size=4)

# Train and test the model
trained_model, final_threshold = train_and_test(model, train_dataloader, test_dataloader, num_epochs, learning_rate, weight_decay)
writer = SummaryWriter()

# Final evaluation
final_accuracy, final_precision, final_recall, final_f1, final_recon_errors = test_model(trained_model, test_dataloader, writer, num_epochs, final_threshold)

print(f"Final Results - Accuracy: {final_accuracy:.4f}, Precision: {final_precision:.4f}, Recall: {final_recall:.4f}, F1-Score: {final_f1:.4f}")
print(f"Final Threshold: {final_threshold}")
print(f"Final Reconstruction Errors: {final_recon_errors}")
print_sample_reconstruction_errors(final_recon_errors, np.array([label for _, label in test_dataloader.dataset]), num_samples=5)





Using device: cuda


2024-08-11 20:34:45,744 - __main__ - INFO - Parsed 50000 packets into 3125 samples with 12 features each
2024-08-11 20:34:45,754 - __main__ - INFO - Feature 0 - Mean: 2655889408.0000, Std: 1071169536.0000
2024-08-11 20:34:45,755 - __main__ - INFO - Feature 1 - Mean: 2757160960.0000, Std: 1072995520.0000
2024-08-11 20:34:45,756 - __main__ - INFO - Feature 2 - Mean: 549.8768, Std: 896.5219
2024-08-11 20:34:45,757 - __main__ - INFO - Feature 3 - Mean: 1.2742, Std: 0.9615
2024-08-11 20:34:45,757 - __main__ - INFO - Feature 4 - Mean: 91.3721, Std: 60.2832
2024-08-11 20:34:45,758 - __main__ - INFO - Feature 5 - Mean: 8.6800, Std: 5.5182
2024-08-11 20:34:45,758 - __main__ - INFO - Feature 6 - Mean: 13521.8740, Std: 21714.3242
2024-08-11 20:34:45,760 - __main__ - INFO - Feature 7 - Mean: 15104.0566, Std: 22016.7422
2024-08-11 20:34:45,760 - __main__ - INFO - Feature 8 - Mean: 245.0459, Std: 1160.1272
2024-08-11 20:34:45,761 - __main__ - INFO - Feature 9 - Mean: 2525.9277, Std: 9968.3281
2024-0

Epoch 0 loss: 0.4850595642033864
Epoch 1 loss: 0.26190549611243946
Epoch 2 loss: 0.19400056725947445
Epoch 3 loss: 0.16201424087122293
Epoch 4 loss: 0.1414582475300797

Sample Reconstruction Errors for Normal Data:
[0.15065774 0.08738014 0.11125164 0.07739394 0.10140427]
Average Reconstruction Error for Normal Data: 0.1792

Sample Reconstruction Errors for Malicious Data:
[0.03546462 0.09013946 0.113087   0.42690906 0.08937418]
Average Reconstruction Error for Malicious Data: 0.1819
Final Results - Accuracy: 0.6069, Precision: 0.7581, Recall: 0.3139, F1-Score: 0.4440
Final Threshold: 0.19744550585746767
Final Reconstruction Errors: [0.40206116 0.43076062 0.13544607 ... 0.14696589 0.02732018 0.11753799]

Sample Reconstruction Errors for Normal Data:
[0.40206116 0.43076062 0.13544607 0.09480257 0.04179009]
Average Reconstruction Error for Normal Data: 0.1789

Sample Reconstruction Errors for Malicious Data:
[0.18110159 0.21393716 0.42636985 0.1959219  0.06174812]
Average Reconstruction E

In [3]:
for batch in train_dataloader:
    print(batch[0][0])

tensor([[-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.5182, -0.1323],
        [-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.5182, -0.1323],
        [-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.5182, -0.1323],
        [-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.5182, -0.1323],
        [-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.5182, -0.1323],
        [-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.5182, -0.1323],
        [-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.5182, -0.1323],
        [-2.3395,  0.4421, -0.5687, -1.3255, -0.6035, -0.4856, -0.6191,  1.5481,
         -0.1966, -0.2204, -0.