In [25]:
import os
import json
import folium
import requests
import pandas as pd
import pycountry_convert as pc
import numpy as np
from statistics import mean 
from pandas import json_normalize
from sklearn.cluster import KMeans
from datetime import datetime, timedelta
from ripe.atlas.cousteau import (
    Ping,
    Traceroute,
    AtlasSource,
    AtlasCreateRequest,
    AtlasResultsRequest
)
from dotenv import load_dotenv

# Load env variables
load_dotenv()

True

In [2]:
df = pd.read_csv('cloudregions.csv', names=["Endpoint", "Provider", "State", "City", "Country", "IP"])

In [3]:
df.head()

Unnamed: 0,Endpoint,Provider,State,City,Country,IP
0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79;
1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246;
2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30;
3,,AWS,N. California,San Francisco,United States,0;
4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88;


In [4]:
# Clean up the IP address
df["IP"] = df["IP"].str[:-1]

In [5]:
df.head()

Unnamed: 0,Endpoint,Provider,State,City,Country,IP
0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79
1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246
2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30
3,,AWS,N. California,San Francisco,United States,0
4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88


In [6]:
# Define a function to make HTTP requests
def make_request(row):
    if row['IP'] == "0":
        return json.dumps("{}")

    url = f"http://api.ipstack.com/{row['IP']}?access_key=af9d6f3b9d4984149040eaa8098938c5"

    try:
        response = requests.get(url)
        print(f"URL: {url}, Status Code: {response.status_code}")
        return response.json()
    except Exception as e:
        print(f"Error for URL {url}: {e}")
        return None
    
def geolocate_ip(df):
    # Make IP geolocation requests for the cloud data centers
    df['geo_response'] = df.apply(make_request, axis=1)
    
    # Explode the JSON response into separate columns
    df_normalized = json_normalize(df['geo_response'])
    
    # Concat with the original frame
    df_result = pd.concat([df, df_normalized], axis=1)
    
    # Filter out those centers for which geolocation failed
    df_result_nonna = df_result[df_result['latitude'].notna()]
    
    return df_result_nonna

In [7]:
# Originally: df_result_nonna = geolocate_ip(df); df_result_nonna.to_csv('cloud_regions_geocoded.csv')
df_result_nonna = pd.read_csv('cloud_regions_geocoded.csv')

In [8]:
df_result_nonna.head()

Unnamed: 0.1,Unnamed: 0,Endpoint,Provider,State,City,Country,IP,geo_response,ip,type,...,location.languages,location.country_flag,location.country_flag_emoji,location.country_flag_emoji_unicode,location.calling_code,location.is_eu,success,error.code,error.type,error.info
0,0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79,"{'ip': '52.46.142.79', 'type': 'ipv4', 'contin...",52.46.142.79,ipv4,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
1,1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246,"{'ip': '99.78.176.246', 'type': 'ipv4', 'conti...",99.78.176.246,ipv4,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
2,2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30,"{'ip': '176.32.118.30', 'type': 'ipv4', 'conti...",176.32.118.30,ipv4,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
3,4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88,"{'ip': '52.94.214.88', 'type': 'ipv4', 'contin...",52.94.214.88,ipv4,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
4,5,https://ec2.ap-east-1.amazonaws.com/ping,AWS,Hong Kong,Hong Kong,Hong Kong,13.248.32.123,"{'ip': '13.248.32.123', 'type': 'ipv4', 'conti...",13.248.32.123,ipv4,...,"[{'code': 'zh', 'name': 'Chinese', 'native': '...",https://assets.ipstack.com/flags/hk.svg,🇭🇰,U+1F1ED U+1F1F0,852,False,,,,


In [9]:
# Prepare a map
map_center = [df_result_nonna['latitude'].mean(), df_result_nonna['longitude'].mean()]
map_object = folium.Map(location=map_center, zoom_start=5)

In [10]:
# Add markers for each row in the DataFrame
for index, row in df_result_nonna.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['Endpoint']
    ).add_to(map_object)

map_object

In [11]:
# Extract latitude and longitude columns for clustering
coordinates = df_result_nonna[['latitude', 'longitude']]

# Choose the number of clusters (k)
k = 30

# Perform k-means clustering
kmeans = KMeans(n_clusters=k, random_state=42)
df_result_nonna['cluster'] = kmeans.fit_predict(coordinates)

In [13]:
colors = [
    'red',
    'blue',
    'gray',
    'darkred',
    'lightred',
    'orange',
    'beige',
    'green',
    'darkgreen',
    'lightgreen',
    'darkblue',
    'lightblue',
    'purple',
    'darkpurple',
    'pink',
    'cadetblue',
    'lightgray',
    'black'
]

map_center = [df_result_nonna['latitude'].mean(), df_result_nonna['longitude'].mean()]
map_object = folium.Map(location=map_center, zoom_start=5)

# Add markers for each row in the DataFrame
for index, row in df_result_nonna.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['Endpoint'],
        icon=folium.Icon(color=colors[row['cluster'] % len(colors)])
    ).add_to(map_object)

map_object

In [14]:
# Group by cluster and provider, and select unique rows for each group
df_data_centers_by_cluster_provider = df_result_nonna.groupby(['cluster', 'Provider']).first().reset_index()
# Pick at most 2 data centers from each group from above
df_data_centers_by_cluster = df_data_centers_by_cluster_provider.groupby(['cluster']).first().reset_index()

In [15]:
df_data_centers_by_cluster.shape

(30, 33)

In [16]:
map_center = [df_data_centers_by_cluster['latitude'].mean(), df_data_centers_by_cluster['longitude'].mean()]
map_object = folium.Map(location=map_center, zoom_start=5)

for index, row in df_data_centers_by_cluster.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['Endpoint']
    ).add_to(map_object)

map_object

In [17]:
df_data_centers_by_cluster.to_csv('cloud_endpoints_final.csv')

In [18]:
# Here we work with unpacked .json file from the latest probe dataset (09-02-2024)
file_path = '2024/02/20240209.json'

with open(file_path, 'r') as f:
    data = json.load(f)
    df_probes = pd.DataFrame(data['objects'])

In [19]:
df_probes.head()

Unnamed: 0,id,address_v4,address_v6,asn_v4,asn_v6,prefix_v4,prefix_v6,is_anchor,is_public,status,status_since,first_connected,total_uptime,tags,country_code,latitude,longitude,day,probe,status_name
0,1,45.138.229.91,2a10:3781:e22:1:220:4aff:fec8:23d7,206238.0,206238.0,45.138.228.0/22,2a10:3780::/29,False,True,1,1707269398,1288368000.0,407264862,"[system-ipv4-stable-1d, system-resolves-aaaa-c...",NL,52.3475,4.9275,20240209,https://atlas.ripe.net/api/v2/probes/1/,Connected
1,2,,,1136.0,1136.0,77.160.0.0/13,2a02:a400::/25,False,False,3,1640571508,1288385000.0,347389948,"[system-no-controller-connection, system-ipv6-...",GU,42.6585,21.1575,20240209,https://atlas.ripe.net/api/v2/probes/2/,Abandoned
2,3,77.174.76.85,2a02:a467:f500:1:220:4aff:fec8:2532,1136.0,1136.0,77.174.0.0/16,2a02:a400::/25,False,True,1,1706618589,1288373000.0,403591652,"[system-ipv6-stable-1d, system-firewall-proble...",NL,52.3685,4.9375,20240209,https://atlas.ripe.net/api/v2/probes/3/,Connected
3,4,83.163.50.165,2001:980:57a4:1:220:4aff:fec8:244a,3265.0,3265.0,83.160.0.0/14,2001:980::/32,False,True,3,1568319241,1288599000.0,248928764,"[dsl, home, system-v1, iwantbcp38compliancetes...",NL,52.3895,4.6375,20240209,https://atlas.ripe.net/api/v2/probes/4/,Abandoned
4,5,83.163.239.181,2001:981:602b:1:220:4aff:fec8:2355,3265.0,3265.0,83.160.0.0/14,2001:980::/30,False,True,3,1513671789,1288600000.0,185731720,"[home, nat, system-v1, system-ipv4-capable, sy...",ES,36.8295,-2.4625,20240209,https://atlas.ripe.net/api/v2/probes/5/,Abandoned


In [20]:
df_probes_filtered = df_probes
df_probes_filtered['tags'] = df_probes['tags'].apply(set)
# Only pick those probes with 1-day stable IP to improve reliability of our measurement
df_probes_filtered = df_probes_filtered[df_probes_filtered['tags'].apply(lambda tags: 'system-ipv4-stable-1d' in tags)]
# Only pick probes with status equal 1
df_probes_filtered = df_probes_filtered[df_probes_filtered['status'] == 1]

In [21]:
df_probes_filtered.shape

(7095, 20)

In [22]:
# Total number of probes we want for our measurement
total_probes = 150

# Share of population by continent
continent_population_share = {
    'NA': 7.5,
    'SA': 5.5,
    'AS': 59.4,
    'OC': 0.6,
    'AF': 17.6,
    'EU': 9.4
}


# Pick samples from a probe dataframe respecting the population
# share by continent
def pick_samples_by_continent(df, population_distribution):
    samples = []

    for continent, share in population_distribution.items():
        # Check if the continent has entries in the DataFrame
        if df[df['continent'] == continent].empty:
            continue

        # Calculate the number of samples based on the share
        num_samples = int(np.ceil(len(df) * share / 100))

        # Select samples for the current continent
        samples_continent = df[df['continent'] == continent].sample(n=num_samples, replace=True)

        # Append the selected samples to the result list
        samples.append(samples_continent)

    # Concatenate the list of DataFrames into a single one
    result_df = pd.concat(samples, ignore_index=True)

    return result_df


# Convert country code to continent
def country_to_continent(country_code):
    try:
        return pc.country_alpha2_to_continent_code(country_code)
    except KeyError:
        return None

In [23]:
df_probes_filtered['continent'] = df_probes_filtered['country_code'].apply(country_to_continent)

In [24]:
df_probes_filtered.head()

Unnamed: 0,id,address_v4,address_v6,asn_v4,asn_v6,prefix_v4,prefix_v6,is_anchor,is_public,status,...,first_connected,total_uptime,tags,country_code,latitude,longitude,day,probe,status_name,continent
0,1,45.138.229.91,2a10:3781:e22:1:220:4aff:fec8:23d7,206238.0,206238.0,45.138.228.0/22,2a10:3780::/29,False,True,1,...,1288368000.0,407264862,"{xs4all, dsl, system-ipv4-stable-1d, nat, syst...",NL,52.3475,4.9275,20240209,https://atlas.ripe.net/api/v2/probes/1/,Connected,EU
7,8,83.81.83.145,2001:1c05:2011:fa00:220:4aff:fec8:2464,33915.0,33915.0,83.80.0.0/14,2001:1c00::/24,False,True,1,...,1288619000.0,405485386,"{ziggo, system-ipv4-stable-1d, nat, cable, hom...",NL,51.1915,5.9975,20240209,https://atlas.ripe.net/api/v2/probes/8/,Connected,EU
13,14,79.55.209.251,,3269.0,,79.55.0.0/16,,False,True,1,...,1289551000.0,352983358,"{system-ipv4-stable-1d, system-ipv4-stable-90d...",IT,41.8995,12.4375,20240209,https://atlas.ripe.net/api/v2/probes/14/,Connected,EU
29,30,,,20115.0,,75.142.96.0/19,,False,False,1,...,1290200000.0,170845189,"{system-ipv4-stable-1d, nat, system-ipv4-stabl...",US,33.8175,-118.0615,20240209,https://atlas.ripe.net/api/v2/probes/30/,Connected,
31,32,76.82.152.84,2603:8001:5000:2aa0:220:4aff:fec8:25ed,20001.0,20001.0,76.80.0.0/14,2603:8000::/28,False,True,1,...,1289868000.0,406671689,"{system-ipv4-stable-1d, nat, cable, home, syst...",US,32.8885,-117.1815,20240209,https://atlas.ripe.net/api/v2/probes/32/,Connected,


In [25]:
# Define filters for different probe classes
filters = {
    "radio (wi-fi)": lambda row: 'home' in row['tags'] and any(
        x in row['tags'] for x in {"wi-fi", "wifi", "wlan", "wireless", "wireless-isp", "wireless-link"}),
    "radio (mobile)": lambda row: any(x in row['tags'] for x in {"lte", "5g", "4g", "3g", "cellular"}),
    "satellite": lambda row: row["asn_v4"] == 14593 or row["asn_v6"] == 14593,
    "ethernet": lambda row: 'home' in row['tags'] and not any(x in row['tags'] for x in {"wi-fi", "wifi", "wlan"}) and  any(
        x in row['tags'] for x in {"dsl", "adsl", "fibre", "fiber", "cable", "ftth"})
}

In [26]:
filtered_probe_frames = {}

for filter_name, filter_func in filters.items():
    df_filtered_temp = df_probes_filtered[df_probes_filtered.apply(filter_func, axis=1)]
    selected_samples = pick_samples_by_continent(df_filtered_temp, continent_population_share)

    if filter_name == "satellite":
        filtered_probe_frames[filter_name] = df_filtered_temp
    elif filter_name == "ethernet":
        filtered_probe_frames[filter_name] = selected_samples.sample(n=520, replace=True)
    else:
        filtered_probe_frames[filter_name] = selected_samples

    filtered_probe_frames[filter_name] = filtered_probe_frames[filter_name].sample(frac=1.0, replace=True)

In [27]:
# We verify we have ~170 unique probes after final sampling
c = 0
for filter_name, probe_df in filtered_probe_frames.items():
    c += probe_df.shape[0]
    print(filter_name, ": ", probe_df.shape[0])
print("All probes:", c)

all_filtered_probes = pd.concat(filtered_probe_frames.values(), ignore_index=True)

print("Unique probes:", len(set(all_filtered_probes['id'].tolist())))

radio (wi-fi) :  51
radio (mobile) :  47
satellite :  12
ethernet :  520
All probes: 630
Unique probes: 171


In [28]:
all_filtered_probes.to_csv('all_filtered_probes.csv')

In [29]:
df_data_centers_by_cluster.head()

Unnamed: 0.1,cluster,Provider,Unnamed: 0,Endpoint,State,City,Country,IP,geo_response,ip,...,location.languages,location.country_flag,location.country_flag_emoji,location.country_flag_emoji_unicode,location.calling_code,location.is_eu,success,error.code,error.type,error.info
0,0,AWS,4,https://ec2.us-west-2.amazonaws.com/ping,Oregon,Portland,United States,52.94.214.88,"{'ip': '52.94.214.88', 'type': 'ipv4', 'contin...",52.94.214.88,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
1,1,UCloud,103,https://feitsui-bjs.cn-bj.ufileos.com/ping.html,cn-bj,Beijing,China,117.50.123.29,"{'ip': '117.50.123.29', 'type': 'ipv4', 'conti...",117.50.123.29,...,"[{'code': 'zh', 'name': 'Chinese', 'native': '...",https://assets.ipstack.com/flags/cn.svg,🇨🇳,U+1F1E8 U+1F1F3,86,False,,,,
2,2,AWS,16,https://ec2.eu-south-1.amazonaws.com/ping,Milan,Milan,Italy,52.119.132.74,"{'ip': '52.119.132.74', 'type': 'ipv4', 'conti...",52.119.132.74,...,"[{'code': 'it', 'name': 'Italian', 'native': '...",https://assets.ipstack.com/flags/it.svg,🇮🇹,U+1F1EE U+1F1F9,39,True,,,,
3,3,AWS,26,https://ec2.sa-east-1.amazonaws.com/ping,São Paulo,São Paulo,Brazil,177.72.245.165,"{'ip': '177.72.245.165', 'type': 'ipv4', 'cont...",177.72.245.165,...,"[{'code': 'pt', 'name': 'Portuguese', 'native'...",https://assets.ipstack.com/flags/br.svg,🇧🇷,U+1F1E7 U+1F1F7,55,False,,,,
4,4,AWS,13,https://ec2.ap-southeast-3.amazonaws.com/ping,Jakarta,Jakarta,Indonesia,99.78.244.123,"{'ip': '99.78.244.123', 'type': 'ipv4', 'conti...",99.78.244.123,...,"[{'code': 'id', 'name': 'Indonesian', 'native'...",https://assets.ipstack.com/flags/id.svg,🇮🇩,U+1F1EE U+1F1E9,62,False,,,,


In [30]:
def map_function_on_keys_and_values(obj, keys_to_map, func):
    if isinstance(obj, dict):
        for key, value in obj.items():
            if key in keys_to_map:
                obj[key] = func(value)
            else:
                obj[key] = map_function_on_keys_and_values(value, keys_to_map, func)
        return obj
    elif isinstance(obj, list):
        return [map_function_on_keys_and_values(item, keys_to_map, func) for item in obj]
    else:
        return obj


def add_measurement_interval(definitions):
    for definition in definitions:
        definition["interval"] = 14400
    return definitions

In [35]:
def create_traceroute_measurements(probe_df, data_center_df):
    ATLAS_API_KEY = os.getenv('ATLAS_API_KEY')
    ATLAS_API_URL = "https://atlas.ripe.net/api/v2/measurements"
    HEADERS = {"Content-Type": "application/json", "Accept": "application/json"}
    CALL_PARAMS = {"key": ATLAS_API_KEY}

    probe_ids = list(set(probe_df["id"].tolist()))

    measurement_source = AtlasSource(
        type="probes",
        value=",".join(map(str, probe_ids)),
        requested=len(probe_ids)
    )

    responses = []

    start_time_utc = datetime.utcnow() + timedelta(minutes=5)

    for index, row in data_center_df.iterrows():
        traceroute = Traceroute(af=4, target=row["IP"], protocol="ICMP",
                    description=f"{row['Provider']} data center endpoint {row['Endpoint']} in {row['City']}, {row['State']}, {row['Country']}")

        atlas_request = AtlasCreateRequest(
            start_time=start_time_utc,
            key=ATLAS_API_KEY,
            measurements=[traceroute],
            sources=[measurement_source],
            is_oneoff=True
        )

        atlas_request._construct_post_data()

        response = requests.post(ATLAS_API_URL, headers=HEADERS, params=CALL_PARAMS, json=atlas_request.post_data)
        responses.append(response)
        print(response.status_code, ": ", response.text)

    return responses

In [36]:
def create_ping_measurements(probe_df, data_center_df):
    ATLAS_API_KEY = os.getenv('ATLAS_API_KEY')
    ATLAS_API_URL = "https://atlas.ripe.net/api/v2/measurements"
    HEADERS = {"Content-Type": "application/json", "Accept": "application/json"}
    CALL_PARAMS = {"key": ATLAS_API_KEY}

    probe_ids = probe_df["id"].tolist()

    measurement_source = AtlasSource(
        type="probes",
        value=",".join(map(str, probe_ids)),
        requested=len(probe_ids)
    )

    responses = []

    start_time_utc = datetime.utcnow() + timedelta(minutes=5)
    end_time_utc = start_time_utc + timedelta(weeks=1)

    for index, row in data_center_df.iterrows():
        ping = Ping(af=4, target=row["IP"],
                    description=f"{row['Provider']} data center endpoint {row['Endpoint']} in {row['City']}, {row['State']}, {row['Country']}")

        atlas_request = AtlasCreateRequest(
            start_time=start_time_utc,
            stop_time=end_time_utc,
            key=ATLAS_API_KEY,
            measurements=[ping],
            sources=[measurement_source],
            is_oneoff=False
        )

        atlas_request._construct_post_data()
        atlas_request.post_data = map_function_on_keys_and_values(atlas_request.post_data, 'definitions',
                                                                  add_measurement_interval)
        
        response = requests.post(ATLAS_API_URL, headers=HEADERS, params=CALL_PARAMS, json=atlas_request.post_data)
        responses.append(response)
        print(response.status_code, ": ", response.text)

    return responses

In [417]:
ping_responses = create_ping_measurements(all_filtered_probes, df_data_centers_by_cluster)

201 :  {"measurements":[67652477]}
201 :  {"measurements":[67652478]}
201 :  {"measurements":[67652479]}
201 :  {"measurements":[67652480]}
201 :  {"measurements":[67652481]}
201 :  {"measurements":[67652482]}
201 :  {"measurements":[67652483]}
201 :  {"measurements":[67652484]}
201 :  {"measurements":[67652485]}
201 :  {"measurements":[67652487]}
201 :  {"measurements":[67652488]}
201 :  {"measurements":[67652489]}
201 :  {"measurements":[67652490]}
201 :  {"measurements":[67652491]}
201 :  {"measurements":[67652492]}
201 :  {"measurements":[67652493]}
201 :  {"measurements":[67652494]}
201 :  {"measurements":[67652495]}
201 :  {"measurements":[67652496]}
201 :  {"measurements":[67652498]}
201 :  {"measurements":[67652499]}
201 :  {"measurements":[67652500]}
201 :  {"measurements":[67652501]}
201 :  {"measurements":[67652502]}
201 :  {"measurements":[67652503]}
201 :  {"measurements":[67652504]}
201 :  {"measurements":[67652505]}
201 :  {"measurements":[67652506]}
201 :  {"measurement

In [418]:
len(ping_responses)

30

In [429]:
traceroute_measurements = create_traceroute_measurements(all_filtered_probes, df_data_centers_by_cluster)

201 :  {"measurements":[67663401]}
201 :  {"measurements":[67663402]}
201 :  {"measurements":[67663404]}
201 :  {"measurements":[67663406]}
201 :  {"measurements":[67663408]}
201 :  {"measurements":[67663410]}
201 :  {"measurements":[67663411]}
201 :  {"measurements":[67663413]}
201 :  {"measurements":[67663414]}
201 :  {"measurements":[67663416]}
201 :  {"measurements":[67663419]}
201 :  {"measurements":[67663420]}
201 :  {"measurements":[67663422]}
201 :  {"measurements":[67663424]}
201 :  {"measurements":[67663426]}
201 :  {"measurements":[67663427]}
201 :  {"measurements":[67663429]}
201 :  {"measurements":[67663431]}
201 :  {"measurements":[67663433]}
201 :  {"measurements":[67663434]}
201 :  {"measurements":[67663436]}
201 :  {"measurements":[67663439]}
201 :  {"measurements":[67663440]}
201 :  {"measurements":[67663443]}
201 :  {"measurements":[67663445]}
201 :  {"measurements":[67663447]}
201 :  {"measurements":[67663448]}
201 :  {"measurements":[67663450]}
201 :  {"measurement

In [4]:
ping_measurements_ids = [67652477,
                         67652478,
                         67652479,
                         67652480,
                         67652481,
                         67652482,
                         67652483,
                         67652484,
                         67652485,
                         67652487,
                         67652488,
                         67652489,
                         67652490,
                         67652491,
                         67652492,
                         67652493,
                         67652494,
                         67652495,
                         67652496,
                         67652498,
                         67652499,
                         67652500,
                         67652501,
                         67652502,
                         67652503,
                         67652504,
                         67652505,
                         67652506,
                         67652507,
                         67652508]

In [5]:
traceroute_measurement_ids = [67663401,
                              67663402,
                              67663404,
                              67663406,
                              67663408,
                              67663410,
                              67663411,
                              67663413,
                              67663414,
                              67663416,
                              67663419,
                              67663420,
                              67663422,
                              67663424,
                              67663426,
                              67663427,
                              67663429,
                              67663431,
                              67663433,
                              67663434,
                              67663436,
                              67663439,
                              67663440,
                              67663443,
                              67663445,
                              67663447,
                              67663448,
                              67663450,
                              67663452,
                              67663454]

In [19]:
def flatten_list(nested_list):
    return [item for sublist in nested_list for item in sublist]

def fetch_measurement_results(ids):
    ATLAS_API_KEY = os.getenv('ATLAS_API_KEY')

    results = []

    for id in ids:
        request_params = {
            "msm_id": id,
        }

        atlas_request = AtlasResultsRequest(is_authenticated=True, key=ATLAS_API_KEY, **request_params)

        for result in atlas_request.create():
            results.append(result)

    results = [x for x in results if isinstance(x, list)]
    results = flatten_list(results)
    
    return pd.DataFrame(results) 

In [20]:
ping_results = fetch_measurement_results(ping_measurements_ids)

In [30]:
ping_results.head(100)

Unnamed: 0,fw,mver,lts,dst_name,af,dst_addr,src_addr,proto,ttl,size,...,avg,msm_id,prb_id,timestamp,msm_name,from,type,group_id,step,stored_timestamp
0,5080,2.6.2,4,52.94.214.88,4,52.94.214.88,192.168.100.100,ICMP,239.0,64,...,265.449459,67652477,1000245,1707656935,Ping,94.201.115.148,ping,67652477,14400,1707656979
1,5010,2.2.0,24,52.94.214.88,4,52.94.214.88,192.168.99.18,ICMP,229.0,64,...,212.986177,67652477,1000465,1707656932,Ping,85.64.211.119,ping,67652477,14400,1707656973
2,5080,2.6.2,2,52.94.214.88,4,52.94.214.88,192.168.96.2,ICMP,239.0,64,...,239.769113,67652477,1002179,1707656939,Ping,2.134.22.170,ping,67652477,14400,1707656998
3,5040,2.4.1,6,52.94.214.88,4,52.94.214.88,172.16.2.128,ICMP,241.0,64,...,320.277881,67652477,1003912,1707656937,Ping,2.51.60.117,ping,67652477,14400,1707657062
4,5080,2.6.2,77,52.94.214.88,4,52.94.214.88,172.17.0.2,ICMP,238.0,64,...,130.307804,67652477,1005043,1707656933,Ping,121.138.25.4,ping,67652477,14400,1707657054
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,5080,2.6.2,32,52.94.214.88,4,52.94.214.88,192.168.1.21,ICMP,231.0,64,...,195.636243,67652477,13308,1707657139,Ping,31.136.193.196,ping,67652477,14400,1707657270
96,5080,2.6.2,44,52.94.214.88,4,52.94.214.88,192.168.5.22,ICMP,234.0,64,...,314.521598,67652477,14691,1707657128,Ping,169.1.145.175,ping,67652477,14400,1707657145
97,5080,2.6.2,60,52.94.214.88,4,52.94.214.88,192.168.0.162,ICMP,237.0,64,...,176.285159,67652477,15794,1707657134,Ping,5.132.87.115,ping,67652477,14400,1707657263
98,5080,2.6.2,45,52.94.214.88,4,52.94.214.88,192.168.3.106,ICMP,236.0,64,...,310.654188,67652477,21120,1707657138,Ping,197.234.182.101,ping,67652477,14400,1707657270


In [34]:
ping_results = ping_results[ping_results['result'].apply(lambda x: len([y for y in x if 'rtt' in y])) > 0]
ping_results['average_rtt'] = ping_results['result'].apply(lambda x : mean([y['rtt'] for y in x if 'rtt' in y]))

In [36]:
ping_results.head(10)

Unnamed: 0,fw,mver,lts,dst_name,af,dst_addr,src_addr,proto,ttl,size,...,msm_id,prb_id,timestamp,msm_name,from,type,group_id,step,stored_timestamp,average_rtt
0,5080,2.6.2,4,52.94.214.88,4,52.94.214.88,192.168.100.100,ICMP,239.0,64,...,67652477,1000245,1707656935,Ping,94.201.115.148,ping,67652477,14400,1707656979,265.449459
1,5010,2.2.0,24,52.94.214.88,4,52.94.214.88,192.168.99.18,ICMP,229.0,64,...,67652477,1000465,1707656932,Ping,85.64.211.119,ping,67652477,14400,1707656973,212.986177
2,5080,2.6.2,2,52.94.214.88,4,52.94.214.88,192.168.96.2,ICMP,239.0,64,...,67652477,1002179,1707656939,Ping,2.134.22.170,ping,67652477,14400,1707656998,239.769113
3,5040,2.4.1,6,52.94.214.88,4,52.94.214.88,172.16.2.128,ICMP,241.0,64,...,67652477,1003912,1707656937,Ping,2.51.60.117,ping,67652477,14400,1707657062,320.277881
4,5080,2.6.2,77,52.94.214.88,4,52.94.214.88,172.17.0.2,ICMP,238.0,64,...,67652477,1005043,1707656933,Ping,121.138.25.4,ping,67652477,14400,1707657054,130.307804
5,5080,2.6.3,232,52.94.214.88,4,52.94.214.88,192.168.0.254,ICMP,239.0,64,...,67652477,1005958,1707656935,Ping,211.114.5.242,ping,67652477,14400,1707656946,114.686065
6,5080,2.6.2,7,52.94.214.88,4,52.94.214.88,192.168.0.233,ICMP,238.0,64,...,67652477,1006365,1707656937,Ping,81.248.236.201,ping,67652477,14400,1707656977,317.673494
7,5080,2.6.2,31,52.94.214.88,4,52.94.214.88,192.168.178.95,ICMP,237.0,64,...,67652477,1006886,1707656930,Ping,80.89.215.220,ping,67652477,14400,1707656976,167.890884
8,5080,2.6.2,74,52.94.214.88,4,52.94.214.88,10.30.1.199,ICMP,230.0,64,...,67652477,11900,1707656938,Ping,213.219.167.143,ping,67652477,14400,1707657057,184.351307
9,5080,2.6.2,17,52.94.214.88,4,52.94.214.88,103.96.67.52,ICMP,239.0,64,...,67652477,13821,1707656920,Ping,103.96.67.52,ping,67652477,14400,1707656992,142.152177


In [38]:
# Load probes dataframe
all_filtered_probes_df = pd.read_csv('all_filtered_probes.csv')

In [41]:
all_filtered_probes_df.head()

# Join the ping results with probes
ping_results_joined_df = pd.merge(ping_results, all_filtered_probes_df, left_on='prb_id', right_on='id')

In [42]:
ping_results_joined_df.head()

Unnamed: 0,fw,mver,lts,dst_name,af,dst_addr,src_addr,proto,ttl,size,...,first_connected,total_uptime,tags,country_code,latitude,longitude,day,probe,status_name,continent
0,5080,2.6.2,4,52.94.214.88,4,52.94.214.88,192.168.100.100,ICMP,239.0,64,...,1582532000.0,101337867,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",AE,25.0895,55.2595,20240209,https://atlas.ripe.net/api/v2/probes/1000245/,Connected,AS
1,5080,2.6.2,4,52.94.214.88,4,52.94.214.88,192.168.100.100,ICMP,239.0,64,...,1582532000.0,101337867,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",AE,25.0895,55.2595,20240209,https://atlas.ripe.net/api/v2/probes/1000245/,Connected,AS
2,5080,2.6.2,4,52.94.214.88,4,52.94.214.88,192.168.100.100,ICMP,239.0,64,...,1582532000.0,101337867,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",AE,25.0895,55.2595,20240209,https://atlas.ripe.net/api/v2/probes/1000245/,Connected,AS
3,5080,2.6.2,4,52.94.214.88,4,52.94.214.88,192.168.100.100,ICMP,239.0,64,...,1582532000.0,101337867,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",AE,25.0895,55.2595,20240209,https://atlas.ripe.net/api/v2/probes/1000245/,Connected,AS
4,5080,2.6.2,4,52.94.214.88,4,52.94.214.88,192.168.100.100,ICMP,239.0,64,...,1582532000.0,101337867,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",AE,25.0895,55.2595,20240209,https://atlas.ripe.net/api/v2/probes/1000245/,Connected,AS


In [43]:
traceroute_results = fetch_measurement_results(traceroute_measurement_ids)

In [45]:
# Join the traceroute results with probes
traceroute_results_df = pd.merge(traceroute_results, all_filtered_probes_df, left_on='prb_id', right_on='id')

In [46]:
traceroute_results_df.head()

Unnamed: 0,fw,mver,lts,endtime,dst_name,dst_addr,src_addr,proto,af,size,...,first_connected,total_uptime,tags,country_code,latitude,longitude,day,probe,status_name,continent
0,5080,2.6.2,76,1707692065,52.94.214.88,52.94.214.88,192.168.2.100,ICMP,4,48,...,1572645000.0,132694981,"{'system-ipv4-stable-30d', 'home', 'system-ipv...",TR,39.9885,32.8185,20240209,https://atlas.ripe.net/api/v2/probes/1000023/,Connected,AS
1,5080,2.6.2,76,1707692065,52.94.214.88,52.94.214.88,192.168.2.100,ICMP,4,48,...,1572645000.0,132694981,"{'system-ipv4-stable-30d', 'home', 'system-ipv...",TR,39.9885,32.8185,20240209,https://atlas.ripe.net/api/v2/probes/1000023/,Connected,AS
2,5080,2.6.2,43,1707692060,52.94.214.88,52.94.214.88,172.17.0.2,ICMP,4,48,...,1581687000.0,83813321,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",JP,35.6715,139.6805,20240209,https://atlas.ripe.net/api/v2/probes/1000179/,Connected,AS
3,5080,2.6.2,36,1707692065,52.94.214.88,52.94.214.88,192.168.100.100,ICMP,4,48,...,1582532000.0,101337867,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",AE,25.0895,55.2595,20240209,https://atlas.ripe.net/api/v2/probes/1000245/,Connected,AS
4,5080,2.6.2,36,1707692065,52.94.214.88,52.94.214.88,192.168.100.100,ICMP,4,48,...,1582532000.0,101337867,"{'raspberry-pi', 'system-ipv4-stable-30d', 'ho...",AE,25.0895,55.2595,20240209,https://atlas.ripe.net/api/v2/probes/1000245/,Connected,AS
