# This notebook serves to perform middlebox infereces for RIPE Atlas v4 probes.

Methodology:
1. Select RIPE Atlas v4 probes that are connected AND have an entry for the public IPv4 address in the meta file.
Output scheduler:
Input file:  Atlas/20210528.json
Total probes in RIPE Atlas file (incl abandoned): 34210
Total probes in RIPE Atlas file (connected and v4)): 11878
Total probes selected (connected, v4 and ipv4 public ip entry): 10105
Selected probes:  10105

2. Load RIPE Atlas meta data
3. Download measurement results from RIPE Atlas and read them
4. Show probes that claim to have successfully reached PEERING
5. Use meta file to check for public IPv4 address of each probe
6. Read tcpdump and show IP stats
7. Confirm traces that claim to hav reached with server-side logs and MUX info in the traces
8. Flag probes that did not actually reach (middlebox inbetween)

In [1]:
import json
import os
import ip_to_asn
from scapy.all import *

#msn_ids = [30458870, 30458871] - version1
#tcpdump = 'tcpdump_2021-05-29_135326.tap5' - version1

msn_ids = [30460770, 30460771] # - version2
tcpdump = 'tcpdump_2021-05-29_165743.tap5' # - version2

target_ip = '147.28.14.1'
peering_mux = '184.164.255.0/24'
folder_prefix = "Atlas/middlebox/"

probes_to_asn = ip_to_asn.load_probes_info('Atlas/20210528.json')

Done reading probes file


In [2]:
with open('Atlas/20210528.json',"r") as read_file:
    probes_file = json.load(read_file)
    
for probe in probes_file['objects']:
    #print(probe['address_v4'])
    #print(probe['address_v6'])
    #print()
    pass

In [3]:
#Some permission problem with the ripe library!

#IDs: 30458870, 30458871
from ripe.atlas.cousteau import AtlasResultsRequest, Measurement, Probe

for id in msn_ids: #two measurement ids
    if not os.path.isfile(folder_prefix + str(id) + ".json"):
        kwargs = {
            "msm_id": id,
            #        "start": datetime(2015, 05, 19),
            #        "stop": datetime(2015, 05, 20),
            #        "probe_ids": [1,2,3,4]
        }

        is_success, results = AtlasResultsRequest(**kwargs).create()

        if is_success:

            counter_success = 0
            counter_error = 0

            measurement = Measurement(id=id)

            print('Measurement: ', id)
            print('Description: ', measurement.description)
            print('Measurement state: ', measurement.status)

            # This saves the measurement data
            dump = json.dumps(results)
            with open(folder_prefix + str(id) + ".json", 'w') as my_data_file:
                my_data_file.write(dump)

            print("SUCCESS fetching RIPE Atlas measurment: ", id)
        else:
            print("ERROR fetching RIPE Atlas measurment: ", id)
    else:
        print('Files already downloaded')

Files already downloaded
Files already downloaded


In [4]:
with open(folder_prefix + str(msn_ids[0]) + ".json","r") as read_file:
    first_file = json.load(read_file)
with open(folder_prefix + str(msn_ids[1]) + ".json","r") as read_file:
    second_file = json.load(read_file)

In [5]:
def collect_traces_that_reached_PEERING(file, successful_probes, successful_probes_data, total_probes):
    total_probes = total_probes + len(file)
    for probe in file:
        for hop in probe['result']:
            try:  # sometimes result looks like: {'error': 'name resolution failed: non-recoverable failure in name resolution (1)'}
                for repeated_hop in hop['result']:
                    try:  # Sometimes there is no from but instead on "x: *"
                        if target_ip in repeated_hop['from']:
                            successful_probes[probe['prb_id']] = [probe['src_addr']]
                            successful_probes_data[probe['prb_id']] = probe
                    except:
                        pass
            except:
                pass
    return total_probes, successful_probes, successful_probes_data

#1. search for Traces that claim to have reached
successful_probes = {}
successful_probes_data = {}
total_probes = 0
total_probes, successful_probes, successful_probes_data = collect_traces_that_reached_PEERING(first_file, successful_probes, successful_probes_data, total_probes)
print('Total probes in first file: ', total_probes)
print('Successful probes in first file: ', len(successful_probes))
print()
total_probes, successful_probes, successful_probes_data = collect_traces_that_reached_PEERING(second_file, successful_probes, successful_probes_data, total_probes)
print('Total probes in first & second file: ', total_probes)
print('Successful probes in first & second file: ', len(successful_probes))

Total probes in first file:  5675
Successful probes in first file:  5638

Total probes in first & second file:  9530
Successful probes in first & second file:  9474


In [6]:
#enrich probes dict with public IPs in addition to private ones (many probes are behind a NAT). However, sometimes the IP in the RIPE Atlas result is the outgoing one so we check for both.
def find_probes_public_ip(probe_id):
    for probe in probes_file['objects']:
        if probe['id'] == probe_id:
            return probe['address_v4']
    return False

for probe in successful_probes:
    successful_probes[probe].append(find_probes_public_ip(probe))

In [7]:
#print pcap error log to see if kernel dropped any packets
with open(folder_prefix + tcpdump + '.err') as read_file:
       print(read_file.read())

tcpdump: listening on tap5, link-type EN10MB (Ethernet), capture size 262144 bytes
58535 packets captured
58582 packets received by filter
0 packets dropped by kernel



In [8]:
#2. check with tcpdump
#read pcap
scapy_cap = rdpcap(folder_prefix + tcpdump + '.pcap')   

In [9]:
#successful_probes[probe_id] = public_ip

ips_that_reached_peering = set()
for packet in scapy_cap:
    if packet[IP].dst == '147.28.14.1':
        ips_that_reached_peering.add(packet[IP].src)
        
print('Any IP that reached peering: ', len(ips_that_reached_peering))
print('RIPE probes claimed to have reached PEERING: ', len(successful_probes))

Any IP that reached peering:  9341
RIPE probes claimed to have reached PEERING:  9474


In [10]:
#check if RIPE probe IP can be confirmed in TCPdump
counter_server_side_check = set()
counter_found = set()
counter_not_found = set()
for probe_id in successful_probes:
    if successful_probes[probe_id][0] in ips_that_reached_peering or successful_probes[probe_id][1] in ips_that_reached_peering: # confirmed via tcpdump on server-side
        counter_found.add(probe_id)
        counter_server_side_check.add(probe_id)
    else: #could not be confirmed via server-side and need checking of the trace itself if MUX is involved.
        #print(successful_probes_data[probe_id]['result'][-1:][0]['result'][0]['from'] == target_ip)
        try:
            if successful_probes_data[probe_id]['result'][-1:][0]['result'][0]['from'] == target_ip:
                try:
                    if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][0]['from'], peering_mux) == True:
                        counter_found.add(probe_id)
                    else:
                        #print(successful_probes_data[probe_id]['result'])
                        counter_not_found.add(probe_id)
                except:
                    try:
                        if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][1]['from'], peering_mux) == True:
                            counter_found.add(probe_id)
                        else:
                            #print(successful_probes_data[probe_id]['result'])
                            counter_not_found.add(probe_id)
                    except:
                        try:
                            if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][2]['from'], peering_mux) == True:
                                counter_found.add(probe_id)
                            else:
                                #print(successful_probes_data[probe_id]['result'])
                                counter_not_found.add(probe_id)
                        except:
                            counter_not_found.add(probe_id)
                            #print(successful_probes_data[probe_id]['result'][-1:][0]['result'][0])
                            #print(successful_probes_data[probe_id]['result'][-2:][0]['result'][0])
                            #print(successful_probes_data[probe_id]['result'][-2:][0]['result'][1])
                            #print(successful_probes_data[probe_id]['result'][-2:][0]['result'][2])
        except:
            try:
                if successful_probes_data[probe_id]['result'][-1:][0]['result'][1]['from'] == target_ip:
                    try:
                        if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][0]['from'], peering_mux) == True:
                            counter_found.add(probe_id)
                        else:
                            #print(successful_probes_data[probe_id]['result'])
                            counter_not_found.add(probe_id)
                    except:
                        try:
                            if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][1]['from'], peering_mux) == True:
                                counter_found.add(probe_id)
                            else:
                                #print(successful_probes_data[probe_id]['result'])
                                counter_not_found.add(probe_id)
                        except:
                            try:
                                if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][2]['from'], peering_mux) == True:
                                    counter_found.add(probe_id)
                                else:
                                    #print(successful_probes_data[probe_id]['result'])
                                    counter_not_found.add(probe_id)
                            except:
                                counter_not_found.add(probe_id)
                                print(successful_probes_data[probe_id]['result'][-1:][0]['result'][0])
                                print(successful_probes_data[probe_id]['result'][-2:][0]['result'][0])
                                print(successful_probes_data[probe_id]['result'][-2:][0]['result'][1])
                                print(successful_probes_data[probe_id]['result'][-2:][0]['result'][2])
            except:
                try:
                    if successful_probes_data[probe_id]['result'][-1:][0]['result'][1]['from'] == target_ip:
                        try:
                            if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][0]['from'], peering_mux) == True:
                                counter_found.add(probe_id)
                            else:
                                #print(successful_probes_data[probe_id]['result'])
                                counter_not_found.add(probe_id)
                        except:
                            try:
                                if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][1]['from'], peering_mux) == True:
                                    counter_found.add(probe_id)
                                else:
                                    #print(successful_probes_data[probe_id]['result'])
                                    counter_not_found.add(probe_id)
                            except:
                                try:
                                    if ip_to_asn.ip_in_prefix(successful_probes_data[probe_id]['result'][-2:][0]['result'][2]['from'], peering_mux) == True:
                                        counter_found.add(probe_id)
                                    else:
                                        #print(successful_probes_data[probe_id]['result'])
                                        counter_not_found.add(probe_id)
                                except:
                                    counter_not_found.add(probe_id)
                                    print(successful_probes_data[probe_id]['result'][-1:][0]['result'][0])
                                    print(successful_probes_data[probe_id]['result'][-2:][0]['result'][0])
                                    print(successful_probes_data[probe_id]['result'][-2:][0]['result'][1])
                                    print(successful_probes_data[probe_id]['result'][-2:][0]['result'][2])
                except:
                    print('Except!')
                    print()
                    #print(successful_probes_data[probe_id]['result'][-2:][0]['result'][2])
                    #print(successful_probes[probe_id])
                    #print(successful_probes_data[probe_id]['from'])
                    print(successful_probes_data[probe_id])


print('Probes confirmed via server-side: ', len(counter_server_side_check))
print('Probes confirmed (server-side + MUX-check): ', len(counter_found))
print('Probes NOT confirmed: ', len(counter_not_found))

Probes confirmed via server-side:  9045
Probes confirmed (server-side + MUX-check):  9367
Probes NOT confirmed:  107


In [11]:
#show probe races that are behind a middlebox:

for id in counter_not_found:
    print('ProbeID: ', id)
    print(successful_probes_data[id])
    print()

ProbeID:  16896
{'fw': 5020, 'mver': '2.2.1', 'lts': 48, 'endtime': 1622307538, 'dst_name': '147.28.14.1', 'dst_addr': '147.28.14.1', 'src_addr': '172.16.0.107', 'proto': 'ICMP', 'af': 4, 'size': 48, 'paris_id': 1, 'result': [{'hop': 1, 'result': [{'from': '172.16.0.1', 'ttl': 64, 'size': 76, 'rtt': 1.561}, {'from': '172.16.0.1', 'ttl': 64, 'size': 76, 'rtt': 0.669}, {'from': '172.16.0.1', 'ttl': 64, 'size': 76, 'rtt': 0.626}]}, {'hop': 2, 'result': [{'from': '192.168.2.1', 'ttl': 63, 'size': 76, 'rtt': 2.953}, {'from': '192.168.2.1', 'ttl': 63, 'size': 76, 'rtt': 1.299}, {'from': '192.168.2.1', 'ttl': 63, 'size': 76, 'rtt': 1.149}]}, {'hop': 3, 'result': [{'from': '192.0.0.1', 'ttl': 253, 'size': 76, 'rtt': 6.374}, {'from': '192.0.0.1', 'ttl': 253, 'size': 76, 'rtt': 3.168}, {'from': '192.0.0.1', 'ttl': 253, 'size': 76, 'rtt': 3.109}]}, {'hop': 4, 'result': [{'x': '*'}, {'x': '*'}, {'x': '*'}]}, {'hop': 5, 'result': [{'x': '*'}, {'x': '*'}, {'x': '*'}]}, {'hop': 6, 'result': [{'x': '*