In [1]:
# Load library
import os
import subprocess
import shlex
import shutil

from scapy.all import *
from PIL import Image
import numpy as np

In [2]:
# Setup variables
IS_RESET = False
ROOT_DATA = './data'
SPLIT_DATA = './splited_data'
PAYLOAD_MIN = 0
FMT = '06d'
COUNT = 19
IMG_DATA = f'./img_{PAYLOAD_MIN}_f{COUNT}_'

In [3]:
if IS_RESET:
    if os.path.exists(SPLIT_DATA):
        shutil.rmtree(SPLIT_DATA)
if os.path.exists(IMG_DATA):
    shutil.rmtree(IMG_DATA)

In [4]:
def pkt2vec(pkt):
    ip = pkt['IP']
    hexst = raw(ip).hex()
    arr = np.array([int(hexst[i:i+2], 16) for i in range(0, len(hexst), 2)])
    arr = arr[0:4*375]
    arr = np.pad(arr, (0, 4*375-len(arr)), 'constant', constant_values=0)
    fv = np.reshape(arr, (-1, 4))
    fv = np.uint8(fv)
    fv = np.stack((fv, fv, fv), axis=2)
    return fv

In [5]:
# read pcap files
def read_pcap(root_dir, ext=('.pcap', '.pcapng')):
    queue = [root_dir]
    while len(queue) != 0:
        nest_dir = queue.pop()
        with os.scandir(nest_dir) as it:
            for entry in it:
                if not entry.name.startswith('.') and entry.is_file():
                    if entry.name.endswith(ext):
                        label = os.path.basename(os.path.dirname(entry.path)) # dirname is label
                        yield label, entry.path
                elif not entry.name.startswith('.') and entry.is_dir():
                    queue.append(entry.path)

In [6]:
def split_pcap(src, dst):
    os.makedirs(dst, exist_ok=True)
    cmd = f'PcapSplitter -f {src} -o {dst} -m connection'
    cmd = shlex.split(cmd)
    subprocess.run(cmd)

In [7]:
# # Do not rerun!
# ## For secure to comment
# pcap_path = os.path.abspath(os.path.expanduser(ROOT_DATA))
# for label, path in read_pcap(pcap_path):
#     dst = os.path.abspath(os.path.expanduser(os.path.join(SPLIT_DATA, label)))
#     split_pcap(path, dst)

In [8]:
def pkt2img(base, label, cnt):
    def process_pkt(pkt):
        if not pkt.haslayer('IP'):
            return
        ip = pkt['IP']
        if not (ip.haslayer('TCP') or ip.haslayer('UDP')):
            return
        if ip.haslayer('TCP'):
            l4 = 'TCP'
        elif ip.haslayer('UDP'):
            l4 = 'UDP'
        if len(raw(ip[l4].payload)) < PAYLOAD_MIN:
            return
        fv = pkt2vec(pkt)
        num = cnt.get(label, 0)
        dst = os.path.join(base, f'{num:{FMT}}.png')
        cnt[label] = num + 1
        img = Image.fromarray(fv)
        img.save(dst)
        if num % 100 == 1:
            print(f'{label}: {num} Processed')
        else:
            print(f'{label}: {num} Processed', end='\r')
    return process_pkt

def stop_filter():
    def process_pkt(pkt):
        global current
        if not pkt.haslayer('IP'):
            return False
        ip = pkt['IP']
        if not (ip.haslayer('TCP') or ip.haslayer('UDP')):
            return False
        if ip.haslayer('TCP'):
            l4 = 'TCP'
        elif ip.haslayer('UDP'):
            l4 = 'UDP'
        if len(raw(ip[l4].payload)) < PAYLOAD_MIN:
            return False
        current += 1
        if current > COUNT:
            return True
        return False
    return process_pkt


splited_path = os.path.abspath(os.path.expanduser(SPLIT_DATA))
cnt = dict()
current = 0

for label, path in read_pcap(splited_path):
    base = os.path.abspath(os.path.expanduser(os.path.join(IMG_DATA, label)))
    os.makedirs(base, exist_ok=True)
    current = 0
    sniff(offline=path, prn=pkt2img(base, label, cnt), store=False, stop_filter=stop_filter())
print(cnt)

vimeo: 1 Processed
vimeo: 101 Processed
vimeo: 201 Processed
vimeo: 301 Processed
vimeo: 401 Processed
vimeo: 501 Processed
vimeo: 601 Processed
vimeo: 701 Processed
vimeo: 801 Processed
vimeo: 901 Processed
vimeo: 1001 Processed
vimeo: 1101 Processed
vimeo: 1201 Processed
vimeo: 1301 Processed
vimeo: 1401 Processed
vimeo: 1501 Processed
vimeo: 1601 Processed
vimeo: 1701 Processed
vimeo: 1801 Processed
vimeo: 1901 Processed
vimeo: 2001 Processed
vimeo: 2101 Processed
vimeo: 2201 Processed
vimeo: 2301 Processed
vimeo: 2401 Processed
vimeo: 2501 Processed
vimeo: 2601 Processed
vimeo: 2701 Processed
vimeo: 2801 Processed
vimeo: 2901 Processed
vimeo: 3001 Processed
vimeo: 3101 Processed
vimeo: 3201 Processed
vimeo: 3301 Processed
vimeo: 3401 Processed
vimeo: 3501 Processed
vimeo: 3601 Processed
vimeo: 3701 Processed
vimeo: 3801 Processed
vimeo: 3901 Processed
vimeo: 4001 Processed
vimeo: 4101 Processed
vimeo: 4201 Processed
vimeo: 4301 Processed
vimeo: 4401 Processed
vimeo: 4501 Processed
