In [10]:
import logging
import sys
from collections import defaultdict

from scapy.all import sniff
from scapy.layers.inet import ICMP
from scapy.layers.tls.record import TLS, TLSApplicationData

sys.path.insert(0, "/home/Mohammad/Workspace/InfoLeak/DoHlyzer")

from meter.flow import Flow
from meter.features.context.packet_direction import PacketDirection 

from meter.time_series.processor import Processor

In [11]:
def process_pcap(path, outdir):
    packets = sniff(offline=path, filter='icmp or port 443')

    doh_traffic = defaultdict(list)
    
    host_name = None
    for p in packets:
        if ICMP in p and p[ICMP].type == 0:  # Echo response
            host_name = p[ICMP].payload.load.decode()
            if host_name == 'DONE':
                logging.info('Yay!')
                break
            logging.info('Analyzing %s', host_name)
    
        if host_name is not None:
            if TLS in p and TLSApplicationData in p and len(p[TLSApplicationData]) >= 40:
                doh_traffic[host_name].append(p)

    for idx, domain in enumerate(doh_traffic):
        domain_packets = doh_traffic[domain]
        
        p = domain_packets[0]
        logging.warning('#%d domain=%s', idx, domain)
        direction = PacketDirection.FORWARD if p.pkttype == 4 else PacketDirection.REVERSE 
        f = Flow(p, direction)
    
        for p in domain_packets:
            direction = PacketDirection.FORWARD if p.pkttype == 4 else PacketDirection.REVERSE
            f.add_packet(p, direction)
    
        proc = Processor(f)
        flow_clumps = proc.create_flow_clumps_container()
        flow_clumps.to_json_single(f'{outdir}/{domain}.json')

In [17]:
import os

def run(i):
    path = f'./dataset-top1000-padded/{i}/doh.pcap'
    outdir = f'./results/padded/{i}'

    logging.warning(f'Processing {path}')
    os.makedirs(outdir, exist_ok=True)
    process_pcap(path, outdir)
 

In [20]:
import gc

for i in range(1, 4):
    gc.collect()
    run(i)

