## Prepare the data for the ML algorithm

In [None]:

%%capture
from scapy.utils import rdpcap

from helper.csv_annotation_utils import add_predictions
from helper.csv_annotation_utils import get_frame_numbers

%run -i ./i2p_network_traffic_patterns_all.ipynb

## Preparing data for extracting

In [None]:
from scapy.layers.inet import IP


def remove_packets(packets, frame_nums_all, frame_nums_exclude, target_ips):
    """
    Removes frames that not sure if connection is a jump.

    :param packets: List of packets
    :param frame_nums_all: Set or list of frame numbers to consider
    :param frame_nums_exclude: Set or list of frame numbers to exclude
    :param target_ips: List of destination IP addresses to match
    :return: A new list of packets without the removed one
    """
    remove_frames = set(frame_nums_all) - set(frame_nums_exclude)
    remove_frames_filtered = [
        frame_num for frame_num in remove_frames
        if IP in packets[frame_num] and packets[frame_num][IP].dst in target_ips
    ]
    return [
        pkt
        for pkt in packets
        if pkt.old_frame_number not in remove_frames_filtered
    ]


packets = rdpcap('traffic_with_filter.pcap')
# Add new attribute to connect to the original data
for i, pkt in enumerate(packets, start=1):
    pkt.old_frame_number = i
# Receive frame number
frame_all_connections = get_frame_numbers(all_connections)
frame_nums_start_to_all_targets = get_frame_numbers(all_start_connections)
frame_nums_start_to_single_unique_target = get_frame_numbers(list(start_node_unique_connections)[0][1])
frame_nums_other_nodes_connections = get_frame_numbers(list(track_other_connections)[0][1])
# Add predictions
add_predictions(packets, frame_all_connections, 0)
add_predictions(packets, frame_nums_start_to_single_unique_target, 1)
# Remove frames
final_packet = remove_packets(
    packets,
    frame_nums_start_to_all_targets,
    frame_nums_start_to_single_unique_target,
    ["10.8.0.6"]  # this can handle just one jump node
)
#Check that the frame number and IP match the original data (just for the user)
for i, pkt in enumerate(final_packet):
    if pkt.haslayer('IP') and pkt['IP'].dst == '10.8.0.6':
        #print(f'Index {i}, Frame {pkt.old_frame_number}') -- Off by default, as too much data is printed
        print(pkt)
# Only keep packages with the prediction attribute set
final_packet = [pkt for pkt in final_packet if hasattr(pkt, 'prediction')]

## Export data as CSV

In [None]:
%load_ext autoreload
%autoreload 2

import importlib
import helper.create_csv_utils

importlib.reload(helper.create_csv_utils)
from helper.create_csv_utils import create_csv

create_csv(final_packet, "cnn.csv", False)
