In [None]:
%pip install numpy matplotlib scapy[basic] tqdm

In [None]:
import datetime

def parse_date(state_string):
    return datetime.datetime.strptime(state_string, '%Y-%m-%d %H:%M')

commutes = [{'filename': 'capture-0-a4.pcap', 
           'description': 'trip from Kaufering to Garching Forschungszentrum',
           'date_start': parse_date('2019-12-16 07:00')
          },
          {'filename': 'capture-1-a4.pcap', 
           'description': 'trip from Kaufering to Garching Forschungszentrum',
           'date_start': parse_date('2019-12-17 07:00')
          },
          {'filename': 'capture-2-a4.pcap', 
           'description': 'trip from Kaufering to Garching Forschungszentrum',
           'date_start': parse_date('2019-12-18 07:00')
          },
          {'filename': 'capture-5-a4.pcap', 
           'description': 'trip from Kaufering to Garching Forschungszentrum',
           'date_start': parse_date('2020-01-13 07:00')
          },
          {'filename': 'capture-6-a4.pcap', 
           'description': 'trip from Kaufering to Garching Forschungszentrum',
           'date_start': parse_date('2020-01-14 07:00')
          },
          {'filename': 'capture-7-a4.pcap', 
           'description': 'trip from Kaufering to Garching Forschungszentrum',
           'date_start': parse_date('2020-01-20 07:00')
          },
          {'filename': 'capture-3-a4.pcap', 
           'description': 'U2 trip from Feldmoching to Messestadt Ost',
           'date_start': parse_date('2019-12-19 12:07')
          },
          {'filename': 'capture-4-a4.pcap', 
           'description': 'U2 trip from Messestadt Ost to Feldmoching',
           'date_start': parse_date('2019-12-19 13:05')
          },
          {'filename': 'capture-2-a1.pcap', 
           'description': 'U6 round trip Garching Forschungszentrum/Klinikum Großhadern',
           'date_start': parse_date('2019-12-19 12:00')
          },
          {'filename': 'capture-3-a1.pcap', 
           'description': 'train trip Munich Central Station to Dortmund Central Station',
           'date_start': parse_date('2019-12-24 07:21')
          },
         ]

In [None]:
from tqdm import tqdm
from scapy.all import PcapReader


def accumulate(file, *accumulators):
    with PcapReader(file) as reader, tqdm(unit='packets', desc=file) as pbar:
        for packet in reader:
            for accumulator in accumulators:
                accumulator(packet)
            pbar.update()
        return accumulators

In [None]:
import random
import numpy as np
from scapy.layers.dot11 import Dot11ProbeReq


class ProbeReqAcc(list):
    TYPE = ('timestamp', np.float), ('mac', np.string_, 17)
    
    def __call__(self, packet):
        if packet.haslayer(Dot11ProbeReq):
            values = packet.time, packet.addr2
            self.append(values)
            
    def as_numpy_array(self):
        return np.array(self, dtype=np.dtype([*self.TYPE]))


class RandProbeReqSampler(list):
    PROBABILITY = 0.001
    
    def __call__(self, packet):
        if packet.haslayer(Dot11ProbeReq) and random.random() <= self.PROBABILITY:
            self.append(packet)

In [None]:
directory = '../probes/'

for commute in commutes:
    path = directory + commute['filename']
    
    probes, samples = accumulate(path, ProbeReqAcc(), RandProbeReqSampler())
    
    commute['probes'] = probes
    commute['samples'] = samples

In [None]:
%matplotlib inline
import math
import matplotlib
import matplotlib.pyplot as plt

In [None]:
def duration(data):
    timestamps = data[:]['timestamp']
    return np.amax(timestamps) - np.amin(timestamps)
    
def plot_packets_per_time_interval(commute):
    sampling_interval = 100
    number_of_buckets = math.ceil(duration(commute['probes'].as_numpy_array()) / sampling_interval)
    
    result = np.zeros(number_of_buckets)
    timestamps = commute['probes'].as_numpy_array()[:]['timestamp']
    timestamp_min = np.amin(timestamps)
    for timestamp in timestamps:
        result[math.floor((timestamp - timestamp_min) / sampling_interval)] += 1

    x_axis = np.arange(0, number_of_buckets * sampling_interval, sampling_interval)

    plt.plot(x_axis, result, label=commute['description']+' on '+commute['date_start'].strftime('%m/%d/%Y %H:%M'))

for i in range(3):
    plot_packets_per_time_interval(commutes[i])

plt.xlabel('time (s)')
plt.ylabel('number of packets')
plt.legend(loc="upper left")

In [None]:
# test

commute_16122019, commute_16122019_samples = accumulate('../probes/capture-0-a4.pcap', ProbeReqAcc(), RandProbeReqSampler())

commute_16122019_samples[0].show()
