In [1]:
import os
import sys
import yaml

sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))

# Define the path to the config.yaml file
config_path = os.path.abspath(os.path.join('..', 'configs', 'config.yaml'))

# Load the YAML file
with open(config_path, 'r') as file:
    config = yaml.safe_load(file)



In [2]:
from capture_data import NetworkTrafficCollector

raw_path = config['data']['raw_path']
print(raw_path)
collection = NetworkTrafficCollector(collection_point="Wi-Fi 2", file_path=raw_path)
#collection.start_capture(timeout=60)



C:\Users\theob\Code\Refactored MLSec\data\raw\data.txt


In [3]:
from capture_data import NetworkTrafficCollector

new_raw_path = config['data']['new_raw_path']
print(new_raw_path)
new_collection = NetworkTrafficCollector(collection_point="Wi-Fi 2", file_path=new_raw_path)
#new_collection.start_capture(timeout=60)

C:\Users\theob\Code\Refactored MLSec\data\new_raw\new_data.txt


In [4]:
from preprocess_data import process_packets

raw_path = config['data']['raw_path']
processed_path = config['data']['processed_path']
#process_packets(raw_path, processed_path)

new_raw_path = config['data']['new_raw_path']
new_processed_path = config['data']['new_processed_path']
#process_packets(new_raw_path, new_processed_path)

DEBUG:h5py._conv:Creating converter from 7 to 5
DEBUG:h5py._conv:Creating converter from 5 to 7
DEBUG:h5py._conv:Creating converter from 7 to 5
DEBUG:h5py._conv:Creating converter from 5 to 7


In [None]:
# Train the autoencoder
from autoencoder import evaluate_autoencoder, store_autoencoder, train_autoencoder

autoencoder = train_autoencoder(new_processed_path)

In [None]:

# Evaluate the autoencoder
evaluate_autoencoder(autoencoder, new_processed_path)

In [None]:

# Store the trained autoencoder
model_path = config['model']['trained_model_path']
store_autoencoder(autoencoder, model_path)

In [27]:
import re
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
from autoencoder import Autoencoder
from torch import nn

def load_model(model_path, input_dim):
    model = Autoencoder(input_dim)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# Function to predict anomalies in new packets
def predict_anomalies(model, new_packets, threshold=0.1):
    new_packets = torch.tensor(new_packets, dtype=torch.float32)
    outputs = model(new_packets)
    loss = nn.functional.mse_loss(outputs, new_packets, reduction='none')
    loss = loss.mean(dim=1)
    anomalies = loss > threshold
    print(loss)
    return anomalies.numpy()

def parse_packet(packet):
        match = re.match(r'Ether / IP / (TCP|UDP) ([\d.]+):(\w+) > ([\d.]+):(\w+) (\w+)', packet.strip())
        if match:
            protocol, src_ip, src_port, dst_ip, dst_port, flags = match.groups()
            return src_ip, src_port, dst_ip, dst_port, protocol, flags
        return None

def preprocess_new_packets(file_path):
    with open(file_path, 'r') as file:
        raw_packets = file.readlines()

    packets = [parse_packet(packet) for packet in raw_packets if parse_packet(packet)]
    df = pd.DataFrame(packets, columns=['src_ip', 'src_port', 'dst_ip', 'dst_port', 'protocol', 'flags'])

    def ip_to_numeric(ip):
        return int(ip.replace('.', ''))

    def port_to_numeric(port):
        try:
            if port.lower() == 'https':
                return 443
            return int(port, 16) if '0x' in port else int(port)
        except:
            return 1

    df['src_ip'] = df['src_ip'].apply(ip_to_numeric)
    df['dst_ip'] = df['dst_ip'].apply(ip_to_numeric)
    df['protocol'] = df['protocol'].astype('category').cat.codes
    df['flags'] = df['flags'].astype('category').cat.codes
    df['src_port'] = df['src_port'].apply(port_to_numeric)
    df['dst_port'] = df['dst_port'].apply(port_to_numeric)

    scaler = MinMaxScaler()
    scaled_packets = scaler.fit_transform(df)

    return scaled_packets, len(packets)

In [28]:
trained_model_path = config['model']['trained_model_path']

new_packets, num_valid_packets = preprocess_new_packets(new_raw_path)

# Load the trained model
input_dim = new_packets.shape[1]
model = load_model(trained_model_path, input_dim)

# Predict anomalies
anomalies = predict_anomalies(model, new_packets)

# Print results
with open(new_raw_path, 'r') as file:
    new_raw_packets = file.readlines()

valid_packet_idx = 0
for packet in new_raw_packets:
    if parse_packet(packet):
        is_anomalous = anomalies[valid_packet_idx]
        status = "Anomalous" if is_anomalous else "Normal"
        print(f"Packet: {packet.strip()}, Status: {status}")
        valid_packet_idx += 1

tensor([0.0952, 0.0590, 0.0590, 0.0590, 0.0590, 0.0952, 0.0757, 0.0757, 0.0757,
        0.0949, 0.0590, 0.0909, 0.0899, 0.0703, 0.0382, 0.0557, 0.0600, 0.0768,
        0.1545, 0.1170, 0.1545, 0.1170, 0.1545, 0.1170, 0.1899, 0.0593, 0.0761,
        0.0507, 0.0671, 0.1545, 0.1170, 0.1545, 0.1170, 0.1545, 0.1170, 0.1545,
        0.1170, 0.1545, 0.1170, 0.1545, 0.1170, 0.0601, 0.0768, 0.0600, 0.0768,
        0.0519, 0.0684, 0.1524, 0.1545, 0.0599, 0.0767, 0.1170, 0.0599, 0.0766,
        0.0526, 0.0690, 0.0601, 0.0769, 0.0519, 0.0684, 0.0519, 0.0684, 0.0519,
        0.0684, 0.0601, 0.0768, 0.0519, 0.0684, 0.1545, 0.0519, 0.1170, 0.0684,
        0.0602, 0.0769, 0.0567, 0.0757, 0.1545, 0.1170, 0.0519, 0.0684, 0.0451,
        0.0633, 0.0598, 0.0766, 0.0445, 0.0625, 0.0590, 0.0757, 0.1545, 0.1170,
        0.0529, 0.0703, 0.0382, 0.0557, 0.0600, 0.0768, 0.1545, 0.1170, 0.0825,
        0.0625, 0.0845, 0.1545, 0.0445, 0.1170], grad_fn=<MeanBackward1>)
Packet: Ether / IP / TCP 10.3.32.21:59375 > 20