In [1]:
import sys, os
sys.path.append(os.path.abspath('../'))

In [2]:
from src.utils.flow import *
from src.utils.flow_calculations import *
from src.utils.restoration import *
from src.utils.truncated_packet import *
from src.operations.size_perturbation_logic import *
from src.operations.timing_perturbation_logic import *
from src.operations.calculate_fitness import *

modified_pcap_path = "../data/interim/testing_small_perturbed.pcapng"
pcap_file_path = '../data/raw/botnet-capture-20110816-donbot.pcap'



In [14]:
truncated_packets = create_truncated_packets_from_pcap(pcap_file_path)
truncated_packets = assign_flow_ids_to_packets(truncated_packets)
flow_id = 2

In [None]:
# SIZES

In [4]:
def generate_weights(length, focus_point='middle', scaling_factor=1.0):
    """
    Generates weights for adjusting sizes or timestamps based on a normal distribution,
    focused on a specific part of the sequence with a scaling factor.
    """
    if length == 0:
        return np.ones(1)
    x = np.linspace(0, length, num=length)
    if focus_point == 'start':
        mean = length * 0.25
    elif focus_point == 'end':
        mean = length * 0.75
    else:  # 'middle'
        mean = length / 2
    std_dev = length / 10  # Control the spread of the influence
    weights = norm.pdf(x, loc=mean, scale=std_dev)
    if np.max(weights) == 0:
        # If the maximum weight is 0, avoid division by zero.
        return np.full(length, scaling_factor)
    weights /= np.max(weights)  # Normalize
    weights = weights * (scaling_factor - 1) + 1  # Adjust scaling
    return weights

In [10]:
def adjust_packet_size_approx(flow_stats, direction=0, scaling_factor=1.0, focus_point='middle'):
    """
    Modyfikuje wybrane kierunki przepływu danych (src->dst, dst->src, lub oba),
    a następnie zwraca pełne statystyki sumaryczne dla przepływu.
    """
    # Inicjalizacja statystyk przepływu
    modified_stats = flow_stats.copy()

    # Obliczenie wag dla każdego kierunku, jeśli potrzebne
    if direction in [0, 1]:  # src->dst lub oba
        total_fwd_packets = flow_stats['tot_fwd_pkts']
        if total_fwd_packets > 0:
            fwd_weights = generate_weights(total_fwd_packets, focus_point=focus_point, scaling_factor=scaling_factor)
            modified_stats['fwd_pkt_len_mean'] = np.mean(fwd_weights) * flow_stats['fwd_pkt_len_mean']
            modified_stats['fwd_pkt_len_min'] = np.min(fwd_weights) * flow_stats['fwd_pkt_len_mean']
            modified_stats['fwd_pkt_len_max'] = np.max(fwd_weights) * flow_stats['fwd_pkt_len_mean']
            modified_stats['fwd_pkt_len_std'] = np.std(fwd_weights) * flow_stats['fwd_pkt_len_std'] if flow_stats['fwd_pkt_len_std'] > 0 else 0
            modified_stats['totlen_fwd_pkts'] = np.sum(fwd_weights) * flow_stats['fwd_pkt_len_mean']

    if direction in [0, 2]:  # dst->src lub oba
        total_bwd_packets = flow_stats['tot_bwd_pkts']
        if total_bwd_packets > 0:
            bwd_weights = generate_weights(total_bwd_packets, focus_point=focus_point, scaling_factor=scaling_factor)
            modified_stats['bwd_pkt_len_mean'] = np.mean(bwd_weights) * flow_stats['bwd_pkt_len_mean']
            modified_stats['bwd_pkt_len_min'] = np.min(bwd_weights) * flow_stats['bwd_pkt_len_mean']
            modified_stats['bwd_pkt_len_max'] = np.max(bwd_weights) * flow_stats['bwd_pkt_len_mean']
            modified_stats['bwd_pkt_len_std'] = np.std(bwd_weights) * flow_stats['bwd_pkt_len_std'] if flow_stats['bwd_pkt_len_std'] > 0 else 0
            modified_stats['totlen_bwd_pkts'] = np.sum(bwd_weights) * flow_stats['bwd_pkt_len_mean']

    # Aktualizacja sumarycznych statystyk przepływu
    total_packets = modified_stats['tot_fwd_pkts'] + modified_stats['tot_bwd_pkts']
    total_len = modified_stats['totlen_fwd_pkts'] + modified_stats['totlen_bwd_pkts']
    modified_stats['pkt_len_mean'] = total_len / total_packets if total_packets > 0 else 0
    pkt_lens = [modified_stats['fwd_pkt_len_mean']] * modified_stats['tot_fwd_pkts'] + \
               [modified_stats['bwd_pkt_len_mean']] * modified_stats['tot_bwd_pkts']
    modified_stats['pkt_len_std'] = np.std(pkt_lens) if pkt_lens else 0

    return modified_stats


In [20]:
flow_id = 1

In [21]:
prepare_size_stats(truncated_packets, flow_id)

{'tot_fwd_pkts': 1090,
 'tot_bwd_pkts': 635,
 'totlen_fwd_pkts': 65646,
 'totlen_bwd_pkts': 1209780,
 'fwd_pkt_len_max': 174,
 'fwd_pkt_len_min': 60,
 'fwd_pkt_len_mean': 60.225688073394494,
 'fwd_pkt_len_std': 4.885710369123273,
 'bwd_pkt_len_max': 4434,
 'bwd_pkt_len_min': 60,
 'bwd_pkt_len_mean': 1905.1653543307086,
 'bwd_pkt_len_std': 701.5260190007999,
 'pkt_len_mean': 739.3773913043478,
 'pkt_len_std': 986.3701563824255}

In [22]:
adjust_packet_size_approx(prepare_size_stats(truncated_packets, flow_id), direction = 0, scaling_factor= 1.7, focus_point='middle')

{'tot_fwd_pkts': 1090,
 'tot_bwd_pkts': 635,
 'totlen_fwd_pkts': 77154.05579771542,
 'totlen_bwd_pkts': 1421718.408291247,
 'fwd_pkt_len_max': 102.38366972477064,
 'fwd_pkt_len_min': 60.225845183226525,
 'fwd_pkt_len_mean': 70.78353742909673,
 'fwd_pkt_len_std': 1.1565894641793688,
 'bwd_pkt_len_max': 3238.7811023622044,
 'bwd_pkt_len_min': 1905.1703242540664,
 'bwd_pkt_len_mean': 2238.9266272303103,
 'bwd_pkt_len_std': 166.04512531519813,
 'pkt_len_mean': 868.9115733849058,
 'pkt_len_std': 1045.6803642392474}

In [27]:
def adjust_packet_size_and_calculate_stats(truncated_packets, flow_id, direction=0, method='uniform', scaling_factor=1.0, focus_point='middle'):
    """
    Przyjmuje listę obiektów pakietów oraz flow_id, filtruje pakiety na podstawie flow_id,
    modyfikuje ich rozmiary na podstawie wybranej metody perturbacji i kierunku,
    i zwraca zaktualizowane i kompleksowe statystyki przepływu dla wszystkich pakietów.
    """
    # Filtracja pakietów na podstawie flow_id
    packets = [pkt for pkt in truncated_packets if pkt.flow_id == flow_id]
    
    # Przygotowanie do obliczenia wag
    if direction == 0:
        direction_packets = packets
    else:
        direction_packets = [pkt for pkt in packets if pkt.direction == direction]
    total_direction_packets = len(direction_packets)

    # Obliczenie wag
    if total_direction_packets > 0:
        if method == 'normal':
            weights = generate_weights(total_direction_packets, focus_point=focus_point, scaling_factor=scaling_factor)
        else:  # 'uniform'
            weights = np.full(total_direction_packets, scaling_factor)

        # Modyfikacja rozmiarów pakietów w wybranym kierunku
        for i, pkt in enumerate(direction_packets):
            pkt.size = int(pkt.size * weights[i])  # Zaaktualizuj rozmiar pakietu

    # Obliczanie statystyk dla wszystkich pakietów po modyfikacji
    fwd_packets = [pkt for pkt in packets if pkt.direction == 1]
    bwd_packets = [pkt for pkt in packets if pkt.direction == 2]

    def calculate_stats(packets):
        if packets:
            sizes = [pkt.size for pkt in packets]
            return {
                'mean': np.mean(sizes),
                'min': np.min(sizes),
                'max': np.max(sizes),
                'std': np.std(sizes),
                'total_len': np.sum(sizes),
                'count': len(sizes),
            }
        return {'mean': 0, 'min': 0, 'max': 0, 'std': 0, 'total_len': 0, 'count': 0}

    fwd_stats = calculate_stats(fwd_packets)
    bwd_stats = calculate_stats(bwd_packets)
    all_stats = calculate_stats(packets)

    updated_stats = {
        'tot_fwd_pkts': fwd_stats['count'],
        'tot_bwd_pkts': bwd_stats['count'],
        'totlen_fwd_pkts': fwd_stats['total_len'],
        'totlen_bwd_pkts': bwd_stats['total_len'],
        'fwd_pkt_len_max': fwd_stats['max'],
        'fwd_pkt_len_min': fwd_stats['min'],
        'fwd_pkt_len_mean': fwd_stats['mean'],
        'fwd_pkt_len_std': fwd_stats['std'],
        'bwd_pkt_len_max': bwd_stats['max'],
        'bwd_pkt_len_min': bwd_stats['min'],
        'bwd_pkt_len_mean': bwd_stats['mean'],
        'bwd_pkt_len_std': bwd_stats['std'],
        'pkt_len_mean': all_stats['mean'],
        'pkt_len_min': min(fwd_stats['min'], bwd_stats['min']) if fwd_stats['min'] and bwd_stats['min'] else max(fwd_stats['min'], bwd_stats['min']),
        'pkt_len_max': max(fwd_stats['max'], bwd_stats['max']),
        'pkt_len_std': all_stats['std'],
    }

    return updated_stats


In [32]:
flow_id = 3

In [68]:
adjust_packet_size_and_calculate_stats(truncated_packets, flow_id,direction = 0, method='uniform', scaling_factor= 1.7, focus_point='middle')

{'tot_fwd_pkts': 2,
 'tot_bwd_pkts': 1,
 'totlen_fwd_pkts': 33908494036.969864,
 'totlen_bwd_pkts': 35518673921.40699,
 'fwd_pkt_len_max': 16954247018.484932,
 'fwd_pkt_len_min': 16954247018.484932,
 'fwd_pkt_len_mean': 16954247018.484932,
 'fwd_pkt_len_std': 0.0,
 'bwd_pkt_len_max': 35518673921.40699,
 'bwd_pkt_len_min': 35518673921.40699,
 'bwd_pkt_len_mean': 35518673921.40699,
 'bwd_pkt_len_std': 0.0,
 'pkt_len_mean': 23142389319.458954,
 'pkt_len_std': 8751354767.93211}

In [36]:
adjust_packet_size(truncated_packets, flow_id, direction = 0, method='uniform', scaling_factor= 1.7, focus_point='middle')

prepare_size_stats(truncated_packets, flow_id)

{'tot_fwd_pkts': 2,
 'tot_bwd_pkts': 1,
 'totlen_fwd_pkts': 1432,
 'totlen_bwd_pkts': 1500,
 'fwd_pkt_len_max': 716,
 'fwd_pkt_len_min': 716,
 'fwd_pkt_len_mean': 716.0,
 'fwd_pkt_len_std': 0.0,
 'bwd_pkt_len_max': 1500,
 'bwd_pkt_len_min': 1500,
 'bwd_pkt_len_mean': 1500.0,
 'bwd_pkt_len_std': 0.0,
 'pkt_len_mean': 977.3333333333334,
 'pkt_len_std': 369.58114430016883}