In [28]:
import json
import pyshark
import nest_asyncio
from ast import literal_eval as make_tuple
import statistics
import matplotlib.pyplot as plt
import numpy as np
nest_asyncio.apply()

In [29]:
def count_quic_packets(filename):
    pcap_data = pyshark.FileCapture(filename)
    count = 0
    packets_count = 0
    for packet in pcap_data:
        packets_count += 1
        # if packet.highest_layer == "QUIC":
        #     count += 1
        for layer in packet:
            if layer.layer_name == 'quic':
                count += 1

    print(f"quic packets count: {count}")
    print(f"packets count: {packets_count}")
    pcap_data.close()

In [30]:
def initial_resumption_statistics(filename):
    pcap_data = pyshark.FileCapture(filename)
    count_initial_packets = 0
    count_resumption_packets = 0  # these are Initial packets that contain a token (token_length != 0)

    for packet in pcap_data:
        # if packet.highest_layer == "QUIC":
        for layer in packet:
            if layer.layer_name == 'quic':
                if int(packet.quic.header_form) == 1 and int(packet.quic.long_packet_type) == 0:
                    count_initial_packets += 1
                    if int(packet.quic.token_length) != 0:
                        count_resumption_packets += 1
                break
    print(f"count Initial packets: {count_initial_packets}")
    print(f"count resumption packets: {count_resumption_packets}")
    print(f"ratio of resumption packets:  {count_resumption_packets / count_initial_packets}")
    pcap_data.close()

In [31]:
# returns the string that represents the ip and port tuple given
def get_tuple_string(ip, port):
    return str(ip) + "-" + str(port)

In [32]:
# returns two options of 4-tuple of the given packet, the two options differ in the source and destination order
def get_four_tuple(packet):

    src_tuple = get_tuple_string(packet.ip.src, packet.udp.srcport)
    dst_tuple = get_tuple_string(packet.ip.dst, packet.udp.dstport)
    return (src_tuple, dst_tuple), (dst_tuple, src_tuple)

In [33]:
# appends an entry to the given dictionary, with the given parameters
def append_to_dict(connections: dict, src_dst_tuple, dst_src_tuple, timestamp, token, initial):
    if (src_dst_tuple, dst_src_tuple) in connections.keys():
        connections[(src_dst_tuple, dst_src_tuple)] += [",".join([timestamp, str(src_dst_tuple), token, initial])]
    elif (dst_src_tuple, src_dst_tuple) in connections.keys():
        connections[(dst_src_tuple, src_dst_tuple)] += [",".join([timestamp, str(src_dst_tuple), token, initial])]
    else:
        connections[(src_dst_tuple, dst_src_tuple)] = [",".join([timestamp, str(src_dst_tuple), token, initial])]

In [34]:
# build a dictionary that contains as keys the IP + Port tuple of both endpoints of a connection.
# the values are timestamp, source IP + Port, destination IP + Port, bool=true if token is available,
def clients_dict(pcap_data, out_file):
    connections = {}
    for packet in pcap_data:
        layers = [layer.layer_name for layer in packet]
        if "quic" in layers and "udp" in layers:
            timestamp = packet.sniff_timestamp
            src_dst_tuple, dst_src_tuple = get_four_tuple(packet)
            initial = "initial" if int(packet.quic.header_form) == 1 and int(packet.quic.long_packet_type) == 0 else "non_initial"
            token = "token_available" if initial == "initial" and int(packet.quic.token_length) != 0 else "no_token_available"
            append_to_dict(connections,
                           src_dst_tuple,
                           dst_src_tuple,
                           timestamp,
                           token,
                           initial)

    with open(out_file, 'w') as f:
        json.dump({str(k): v for k, v in connections.items()}, f, indent=4)

In [35]:
files_base_path = "../pcap-data/DATA/ALLOT/YouTube/"

In [36]:
filename1 = files_base_path + "1667395874.0_video_qoe_1_R9JNA0DNB9J_2022-11-02_15-31-14_00000.pcap"
count_quic_packets(filename1)
initial_resumption_statistics(filename1)

KeyboardInterrupt: 

In [None]:
filename2 = files_base_path + "1667395874.0_video_qoe_2_R9JNA0DNB9J_2022-11-02_15-31-14_00000.pcap"
count_quic_packets(filename2)
initial_resumption_statistics(filename2)

In [None]:
all_filename = files_base_path + "1660633522.0_video_qoe_all_2022-08-16_10-05-22_00000.pcap"
count_quic_packets(all_filename)
initial_resumption_statistics(all_filename)

In [None]:
filename2 = files_base_path + "1667395874.0_video_qoe_2_R9JNA0DNB9J_2022-11-02_15-31-14_00000.pcap"
initial_resumption_statistics(filename2)

In [None]:
filename3 = files_base_path + "1667395874.0_video_qoe_3_R9JNA0DNB9J_2022-11-02_15-31-14_00000.pcap"
count_quic_packets(filename3)
initial_resumption_statistics(filename3)

In [None]:
filename3 = files_base_path + "1667395874.0_video_qoe_3_R9JNA0DNB9J_2022-11-02_15-31-14_00000"
suffix = ".pcap"
filename = filename3 + suffix
pcap_data = pyshark.FileCapture(filename)
clients_dict(pcap_data, out_file=filename3 + ".json")

In [None]:
filename3 = files_base_path + "1667395874.0_video_qoe_2_R9JNA0DNB9J_2022-11-02_15-31-14_00000"
suffix = ".pcap"
filename = filename3 + suffix
pcap_data = pyshark.FileCapture(filename)
clients_dict(pcap_data, out_file=filename3 + ".json")

In [37]:
filename3 = files_base_path + "1660633522.0_video_qoe_all_2022-08-16_10-05-22_00000"
suffix = ".pcap"
filename = filename3 + suffix
pcap_data = pyshark.FileCapture(filename)
clients_dict(pcap_data, out_file=filename3 + ".json")

  attributes = dict(field.attrib)


In [None]:
def read_json_to_dict(filename):
    connections = {}
    with open(filename) as file:
        connections = json.load(file)

    return connections

In [None]:
def connections_number_starting_with_token(connections):
    count_with_token = 0
    for v in connections.values():
        if v[0].split(",")[3] == 'token_available':
            count_with_token += 1
    return count_with_token

In [None]:
connections = read_json_to_dict(filename3 + ".json")
packets_number_per_connection = sorted([len(v) for v in connections.values()])

In [None]:
print(f"number of connections: {len(connections.keys())}")
print(f"number of connections using tokens in the first Initial packet of the connection: {connections_number_starting_with_token(connections)}")
print(f"ratio of connections using tokens and overall connections: {connections_number_starting_with_token(connections) / len(connections.keys())}")
print(f"number of packets per connection median: {statistics.median(packets_number_per_connection)}")
print(f"number of packets per connection mean: {statistics.mean(packets_number_per_connection)}")
print(f"number of packets overall: {sum(packets_number_per_connection)}")
print(f"sniffing time: {6578.2} seconds, {6578.2 / 60} minutes")
print(f"number of packets per connection: {packets_number_per_connection}")
plt.boxplot(packets_number_per_connection)
plt.ylim(0,200)
plt.show()

In [None]:
four_tuples = []
for k in connections.keys():
    four_tuples += [make_tuple(k)[0][0], make_tuple(k)[0][1], make_tuple(k)[1][0], make_tuple(k)[1][1]]

print(len(four_tuples))
four_tuples = sorted(list(set(four_tuples)))
print(len(four_tuples))
for t in four_tuples:
    print(t)
four_tuples[0]


# print(len(set(four_tuples)))
# print(set(four_tuples))
# print(('192.168.3.135-59561', '173.194.76.100-443') in four_tuples)