In [304]:
import os
import json
import folium
import requests
import pycountry
import pandas as pd
import pycountry_convert as pc
import numpy as np
from pandas import json_normalize
from sklearn.cluster import KMeans
from datetime import datetime, timedelta
from ripe.atlas.cousteau import (
    Ping,
    Traceroute,
    AtlasSource,
    AtlasCreateRequest
)

In [11]:
df = pd.read_csv('cloudregions.csv', names=["Endpoint", "Provider", "State", "City", "Country", "IP"])

In [12]:
df.head()

Unnamed: 0,Endpoint,Provider,State,City,Country,IP
0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79;
1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246;
2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30;
3,,AWS,N. California,San Francisco,United States,0;
4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88;


In [13]:
# Clean up the IP address
df["IP"] = df["IP"].str[:-1]

In [9]:
df.head()

Unnamed: 0,Endpoint,Provider,State,City,Country,IP
0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79
1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246
2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30
3,,AWS,N. California,San Francisco,United States,0
4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88


In [18]:
# Define a function to make HTTP requests
def make_request(row):
    if row['IP'] == "0":
        return json.dumps("{}")

    url = f"http://api.ipstack.com/{row['IP']}?access_key=af9d6f3b9d4984149040eaa8098938c5"

    try:
        response = requests.get(url)
        print(f"URL: {url}, Status Code: {response.status_code}")
        return response.json()
    except Exception as e:
        print(f"Error for URL {url}: {e}")
        return None

In [19]:
# Retrieve geolocation data for each IP address (only first 100 will be successful due to API limits)
df['geo_response'] = df.apply(make_request, axis=1)

URL: http://api.ipstack.com/52.46.142.79?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/99.78.176.246?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/176.32.118.30?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/52.94.214.88?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/13.248.32.123?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/52.95.88.14?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/13.248.4.70?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/52.95.193.80?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/15.221.8.221?access_key=af9d6f3b9d4984149040eaa8098938c5, Status Code: 200
URL: http://api.ipstack.com/99.83.82.12?access_key=af9d6f3b9d4984149040e

In [27]:
# Explode the JSON response into separate columns
df_normalized = json_normalize(df['geo_response'])

In [28]:
df_normalized.head()

Unnamed: 0,ip,type,continent_code,continent_name,country_code,country_name,region_code,region_name,city,zip,...,location.languages,location.country_flag,location.country_flag_emoji,location.country_flag_emoji_unicode,location.calling_code,location.is_eu,success,error.code,error.type,error.info
0,52.46.142.79,ipv4,,North America,US,United States,VA,Virginia,Ashburn,20147.0,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,
1,99.78.176.246,ipv4,,North America,US,United States,OH,Ohio,Columbus,43201.0,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,
2,176.32.118.30,ipv4,,North America,US,United States,CA,California,San Jose,95122.0,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,52.94.214.88,ipv4,,North America,US,United States,OR,Oregon,Boardman,97818.0,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,


In [29]:
df_result = pd.concat([df, df_normalized], axis=1)

In [30]:
df_result.head()

Unnamed: 0,Endpoint,Provider,State,City,Country,IP,geo_response,ip,type,continent_code,...,location.languages,location.country_flag,location.country_flag_emoji,location.country_flag_emoji_unicode,location.calling_code,location.is_eu,success,error.code,error.type,error.info
0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79,"{'ip': '52.46.142.79', 'type': 'ipv4', 'contin...",52.46.142.79,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,
1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246,"{'ip': '99.78.176.246', 'type': 'ipv4', 'conti...",99.78.176.246,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,
2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30,"{'ip': '176.32.118.30', 'type': 'ipv4', 'conti...",176.32.118.30,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,
3,,AWS,N. California,San Francisco,United States,0,"""{}""",,,,...,,,,,,,,,,
4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88,"{'ip': '52.94.214.88', 'type': 'ipv4', 'contin...",52.94.214.88,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1.0,False,,,,


In [36]:
# Only consider successfully geo-resolved datacenter endpoints
df_result_nonna = df_result[df_result['latitude'].notna()]

In [37]:
df_result_nonna.head()

Unnamed: 0,Endpoint,Provider,State,City,Country,IP,geo_response,ip,type,continent_code,...,location.languages,location.country_flag,location.country_flag_emoji,location.country_flag_emoji_unicode,location.calling_code,location.is_eu,success,error.code,error.type,error.info
0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79,"{'ip': '52.46.142.79', 'type': 'ipv4', 'contin...",52.46.142.79,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246,"{'ip': '99.78.176.246', 'type': 'ipv4', 'conti...",99.78.176.246,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30,"{'ip': '176.32.118.30', 'type': 'ipv4', 'conti...",176.32.118.30,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88,"{'ip': '52.94.214.88', 'type': 'ipv4', 'contin...",52.94.214.88,ipv4,,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
5,https://ec2.ap-east-1.amazonaws.com/ping,AWS,Hong Kong,Hong Kong,Hong Kong,13.248.32.123,"{'ip': '13.248.32.123', 'type': 'ipv4', 'conti...",13.248.32.123,ipv4,AS,...,"[{'code': 'zh', 'name': 'Chinese', 'native': '...",https://assets.ipstack.com/flags/hk.svg,🇭🇰,U+1F1ED U+1F1F0,852,False,,,,


In [38]:
# Prepare a map
map_center = [df_result_nonna['latitude'].mean(), df_result_nonna['longitude'].mean()]
map_object = folium.Map(location=map_center, zoom_start=5)

In [39]:
# Add markers for each row in the DataFrame
for index, row in df_result_nonna.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['Endpoint']
    ).add_to(map_object)

map_object

In [40]:
df_result_nonna.to_csv('cloud_regions_geocoded.csv')

In [166]:
# Extract latitude and longitude columns for clustering
coordinates = df_result_nonna[['latitude', 'longitude']]
# Choose the number of clusters (k)
k = 30
# Perform k-means clustering
kmeans = KMeans(n_clusters=k, random_state=42)
df_result_nonna['cluster'] = kmeans.fit_predict(coordinates)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result_nonna['cluster'] = kmeans.fit_predict(coordinates)


In [167]:
df_result_nonna.head()

Unnamed: 0,Endpoint,Provider,State,City,Country,IP,geo_response,ip,type,continent_code,...,location.country_flag,location.country_flag_emoji,location.country_flag_emoji_unicode,location.calling_code,location.is_eu,success,error.code,error.type,error.info,cluster
0,https://ec2.us-east-1.amazonaws.com/ping,AWS,N. Virginia,Ashburn,United States,52.46.142.79,"{'ip': '52.46.142.79', 'type': 'ipv4', 'contin...",52.46.142.79,ipv4,,...,https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,,26
1,https://ec2.us-east-2.amazonaws.com/ping,AWS,Ohio,Columbus,United States,99.78.176.246,"{'ip': '99.78.176.246', 'type': 'ipv4', 'conti...",99.78.176.246,ipv4,,...,https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,,13
2,https://ec2.us-west-1.amazonaws.com/ping,AWS,N. California,San Francisco,United States,176.32.118.30,"{'ip': '176.32.118.30', 'type': 'ipv4', 'conti...",176.32.118.30,ipv4,,...,https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,,22
4,https://ec2.us-west-2.amazonaws.com/ping,AWS,Oregon,Portland,United States,52.94.214.88,"{'ip': '52.94.214.88', 'type': 'ipv4', 'contin...",52.94.214.88,ipv4,,...,https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,,0
5,https://ec2.ap-east-1.amazonaws.com/ping,AWS,Hong Kong,Hong Kong,Hong Kong,13.248.32.123,"{'ip': '13.248.32.123', 'type': 'ipv4', 'conti...",13.248.32.123,ipv4,AS,...,https://assets.ipstack.com/flags/hk.svg,🇭🇰,U+1F1ED U+1F1F0,852,False,,,,,9


In [168]:
colors = [
    'red',
    'blue',
    'gray',
    'darkred',
    'lightred',
    'orange',
    'beige',
    'green',
    'darkgreen',
    'lightgreen',
    'darkblue',
    'lightblue',
    'purple',
    'darkpurple',
    'pink',
    'cadetblue',
    'lightgray',
    'black'
]

map_center = [df_result_nonna['latitude'].mean(), df_result_nonna['longitude'].mean()]
map_object = folium.Map(location=map_center, zoom_start=5)

# Add markers for each row in the DataFrame
for index, row in df_result_nonna.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['Endpoint'],
        icon=folium.Icon(color=colors[row['cluster'] % len(colors)])
    ).add_to(map_object)

map_object

In [190]:
# Group by cluster and provider, and select unique rows for each group
df_data_centers_by_cluster_provider = df_result_nonna.groupby(['cluster', 'Provider']).first().reset_index()
# Pick at most 2 data centers from each group from above
df_data_centers_by_cluster = df_data_centers_by_cluster_provider.groupby(['cluster']).first().reset_index()

In [192]:
df_data_centers_by_cluster.shape

(30, 32)

In [193]:
map_center = [df_data_centers_by_cluster['latitude'].mean(), df_data_centers_by_cluster['longitude'].mean()]
map_object = folium.Map(location=map_center, zoom_start=5)

for index, row in df_data_centers_by_cluster.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=row['Endpoint']
    ).add_to(map_object)

map_object

In [194]:
df_data_centers_by_cluster.to_csv('cloud_endpoints_final.csv')

In [195]:
# Here we work with unpacked .json file from the latest probe dataset (09-02-2024)
file_path = '2024/02/20240209.json'

with open(file_path, 'r') as f:
    data = json.load(f)
    df_probes = pd.DataFrame(data['objects'])

In [196]:
df_probes.head()

Unnamed: 0,id,address_v4,address_v6,asn_v4,asn_v6,prefix_v4,prefix_v6,is_anchor,is_public,status,status_since,first_connected,total_uptime,tags,country_code,latitude,longitude,day,probe,status_name
0,1,45.138.229.91,2a10:3781:e22:1:220:4aff:fec8:23d7,206238.0,206238.0,45.138.228.0/22,2a10:3780::/29,False,True,1,1707269398,1288368000.0,407264862,"[system-ipv4-stable-1d, system-resolves-aaaa-c...",NL,52.3475,4.9275,20240209,https://atlas.ripe.net/api/v2/probes/1/,Connected
1,2,,,1136.0,1136.0,77.160.0.0/13,2a02:a400::/25,False,False,3,1640571508,1288385000.0,347389948,"[system-no-controller-connection, system-ipv6-...",GU,42.6585,21.1575,20240209,https://atlas.ripe.net/api/v2/probes/2/,Abandoned
2,3,77.174.76.85,2a02:a467:f500:1:220:4aff:fec8:2532,1136.0,1136.0,77.174.0.0/16,2a02:a400::/25,False,True,1,1706618589,1288373000.0,403591652,"[system-ipv6-stable-1d, system-firewall-proble...",NL,52.3685,4.9375,20240209,https://atlas.ripe.net/api/v2/probes/3/,Connected
3,4,83.163.50.165,2001:980:57a4:1:220:4aff:fec8:244a,3265.0,3265.0,83.160.0.0/14,2001:980::/32,False,True,3,1568319241,1288599000.0,248928764,"[dsl, home, system-v1, iwantbcp38compliancetes...",NL,52.3895,4.6375,20240209,https://atlas.ripe.net/api/v2/probes/4/,Abandoned
4,5,83.163.239.181,2001:981:602b:1:220:4aff:fec8:2355,3265.0,3265.0,83.160.0.0/14,2001:980::/30,False,True,3,1513671789,1288600000.0,185731720,"[home, nat, system-v1, system-ipv4-capable, sy...",ES,36.8295,-2.4625,20240209,https://atlas.ripe.net/api/v2/probes/5/,Abandoned


In [230]:
df_probes_filtered = df_probes
df_probes_filtered['tags'] = df_probes['tags'].apply(set)
# Only pick those probes with 1-day stable IP to improve reliability of our measurement
df_probes_filtered = df_probes_filtered[df_probes_filtered['tags'].apply(lambda tags: 'system-ipv4-stable-1d' in tags)]
# Only pick probes with status equal 1
df_probes_filtered = df_probes_filtered[df_probes_filtered['status'] == 1]

In [231]:
df_probes_filtered.shape

(7095, 20)

In [232]:
# Total number of probes we want for our measurement
total_probes = 150

# Share of population by continent
continent_population_share = {
    'NA': 7.5,
    'SA': 5.5,
    'AS': 59.4,
    'OC': 0.6,
    'AF': 17.6,
    'EU': 9.4
}


# Pick samples from a probe dataframe respecting the population
# share by continent
def pick_samples_by_continent(df, population_distribution):
    samples = []

    for continent, share in population_distribution.items():
        # Check if the continent has entries in the DataFrame
        if df[df['continent'] == continent].empty:
            continue

        # Calculate the number of samples based on the share
        num_samples = int(np.ceil(len(df) * share / 100))

        # Select samples for the current continent
        samples_continent = df[df['continent'] == continent].sample(n=num_samples, replace=True)

        # Append the selected samples to the result list
        samples.append(samples_continent)

    # Concatenate the list of DataFrames into a single one
    result_df = pd.concat(samples, ignore_index=True)

    return result_df


# Convert country code to continent
def country_to_continent(country_code):
    try:
        return pc.country_alpha2_to_continent_code(country_code)
    except KeyError:
        return None

In [233]:
df_probes_filtered['continent'] = df_probes_filtered['country_code'].apply(country_to_continent)

In [234]:
df_probes_filtered.head()

Unnamed: 0,id,address_v4,address_v6,asn_v4,asn_v6,prefix_v4,prefix_v6,is_anchor,is_public,status,...,first_connected,total_uptime,tags,country_code,latitude,longitude,day,probe,status_name,continent
0,1,45.138.229.91,2a10:3781:e22:1:220:4aff:fec8:23d7,206238.0,206238.0,45.138.228.0/22,2a10:3780::/29,False,True,1,...,1288368000.0,407264862,"{system-ipv4-stable-30d, home, system-ipv4-rfc...",NL,52.3475,4.9275,20240209,https://atlas.ripe.net/api/v2/probes/1/,Connected,EU
7,8,83.81.83.145,2001:1c05:2011:fa00:220:4aff:fec8:2464,33915.0,33915.0,83.80.0.0/14,2001:1c00::/24,False,True,1,...,1288619000.0,405485386,"{ziggo, system-ipv4-stable-30d, home, system-i...",NL,51.1915,5.9975,20240209,https://atlas.ripe.net/api/v2/probes/8/,Connected,EU
13,14,79.55.209.251,,3269.0,,79.55.0.0/16,,False,True,1,...,1289551000.0,352983358,"{system-v1, system-ipv4-stable-30d, system-ipv...",IT,41.8995,12.4375,20240209,https://atlas.ripe.net/api/v2/probes/14/,Connected,EU
29,30,,,20115.0,,75.142.96.0/19,,False,False,1,...,1290200000.0,170845189,"{system-v1, system-ipv4-stable-30d, home, syst...",US,33.8175,-118.0615,20240209,https://atlas.ripe.net/api/v2/probes/30/,Connected,
31,32,76.82.152.84,2603:8001:5000:2aa0:220:4aff:fec8:25ed,20001.0,20001.0,76.80.0.0/14,2603:8000::/28,False,True,1,...,1289868000.0,406671689,"{system-ipv4-stable-30d, home, system-ipv4-rfc...",US,32.8885,-117.1815,20240209,https://atlas.ripe.net/api/v2/probes/32/,Connected,


In [393]:
# Define filters for different probe classes
filters = {
    "radio (wi-fi)": lambda row: 'home' in row['tags'] and any(
        x in row['tags'] for x in {"wi-fi", "wifi", "wlan", "wireless", "wireless-isp", "wireless-link"}),
    "radio (mobile)": lambda row: any(x in row['tags'] for x in {"lte", "5g", "4g", "3g", "cellular"}),
    "satellite": lambda row: row["asn_v4"] == 14593 or row["asn_v6"] == 14593,
    "ethernet": lambda row: 'home' in row['tags'] and not any(x in row['tags'] for x in {"wi-fi", "wifi", "wlan"}) and  any(
        x in row['tags'] for x in {"dsl", "adsl", "fibre", "fiber", "cable", "ftth"})
}

In [409]:
filtered_probe_frames = {}

for filter_name, filter_func in filters.items():
    df_filtered_temp = df_probes_filtered[df_probes_filtered.apply(filter_func, axis=1)]
    selected_samples = pick_samples_by_continent(df_filtered_temp, continent_population_share)

    if filter_name == "satellite":
        filtered_probe_frames[filter_name] = df_filtered_temp
    elif filter_name == "ethernet":
        filtered_probe_frames[filter_name] = selected_samples.sample(n=520, replace=True)
    else:
        filtered_probe_frames[filter_name] = selected_samples

    filtered_probe_frames[filter_name] = filtered_probe_frames[filter_name].sample(frac=1.0, replace=True)

In [410]:
# We verify we have ~170 unique probes after final sampling
c = 0
for filter_name, probe_df in filtered_probe_frames.items():
    c += probe_df.shape[0]
    print(filter_name, ": ", probe_df.shape[0])
print("All probes:", c)

all_filtered_probes = pd.concat(filtered_probe_frames.values(), ignore_index=True)

print("Unique probes:", len(set(all_filtered_probes['id'].tolist())))

radio (wi-fi) :  51
radio (mobile) :  47
satellite :  12
ethernet :  520
All probes: 630
Unique probes: 167


In [411]:
all_filtered_probes.to_csv('all_filtered_probes')

In [412]:
df_data_centers_by_cluster.head()

Unnamed: 0,cluster,Provider,Endpoint,State,City,Country,IP,geo_response,ip,type,...,location.languages,location.country_flag,location.country_flag_emoji,location.country_flag_emoji_unicode,location.calling_code,location.is_eu,success,error.code,error.type,error.info
0,0,AWS,https://ec2.us-west-2.amazonaws.com/ping,Oregon,Portland,United States,52.94.214.88,"{'ip': '52.94.214.88', 'type': 'ipv4', 'contin...",52.94.214.88,ipv4,...,"[{'code': 'en', 'name': 'English', 'native': '...",https://assets.ipstack.com/flags/us.svg,🇺🇸,U+1F1FA U+1F1F8,1,False,,,,
1,1,UCloud,https://feitsui-bjs.cn-bj.ufileos.com/ping.html,cn-bj,Beijing,China,117.50.123.29,"{'ip': '117.50.123.29', 'type': 'ipv4', 'conti...",117.50.123.29,ipv4,...,"[{'code': 'zh', 'name': 'Chinese', 'native': '...",https://assets.ipstack.com/flags/cn.svg,🇨🇳,U+1F1E8 U+1F1F3,86,False,,,,
2,2,AWS,https://ec2.eu-south-1.amazonaws.com/ping,Milan,Milan,Italy,52.119.132.74,"{'ip': '52.119.132.74', 'type': 'ipv4', 'conti...",52.119.132.74,ipv4,...,"[{'code': 'it', 'name': 'Italian', 'native': '...",https://assets.ipstack.com/flags/it.svg,🇮🇹,U+1F1EE U+1F1F9,39,True,,,,
3,3,AWS,https://ec2.sa-east-1.amazonaws.com/ping,São Paulo,São Paulo,Brazil,177.72.245.165,"{'ip': '177.72.245.165', 'type': 'ipv4', 'cont...",177.72.245.165,ipv4,...,"[{'code': 'pt', 'name': 'Portuguese', 'native'...",https://assets.ipstack.com/flags/br.svg,🇧🇷,U+1F1E7 U+1F1F7,55,False,,,,
4,4,AWS,https://ec2.ap-southeast-3.amazonaws.com/ping,Jakarta,Jakarta,Indonesia,99.78.244.123,"{'ip': '99.78.244.123', 'type': 'ipv4', 'conti...",99.78.244.123,ipv4,...,"[{'code': 'id', 'name': 'Indonesian', 'native'...",https://assets.ipstack.com/flags/id.svg,🇮🇩,U+1F1EE U+1F1E9,62,False,,,,


In [413]:
def map_function_on_keys_and_values(obj, keys_to_map, func):
    if isinstance(obj, dict):
        for key, value in obj.items():
            if key in keys_to_map:
                obj[key] = func(value)
            else:
                obj[key] = map_function_on_keys_and_values(value, keys_to_map, func)
        return obj
    elif isinstance(obj, list):
        return [map_function_on_keys_and_values(item, keys_to_map, func) for item in obj]
    else:
        return obj


def add_measurement_interval(definitions):
    for definition in definitions:
        definition["interval"] = 14400
    return definitions

In [416]:
def create_ping_measurements(probe_df, data_center_df):
    ATLAS_API_KEY = ""
    ATLAS_API_URL = "https://atlas.ripe.net/api/v2/measurements"
    HEADERS = {"Content-Type": "application/json", "Accept": "application/json"}
    CALL_PARAMS = {"key": ATLAS_API_KEY}

    probe_ids = probe_df["id"].tolist()

    measurement_source = AtlasSource(
        type="probes",
        value=",".join(map(str, probe_ids)),
        requested=len(probe_ids)
    )

    responses = []

    start_time_utc = datetime.utcnow() + timedelta(minutes=5)
    end_time_utc = start_time_utc + timedelta(weeks=1)

    for index, row in data_center_df.iterrows():
        ping = Ping(af=4, target=row["IP"],
                    description=f"{row['Provider']} data center endpoint {row['Endpoint']} in {row['City']}, {row['State']}, {row['Country']}")

        atlas_request = AtlasCreateRequest(
            start_time=start_time_utc,
            stop_time=end_time_utc,
            key=ATLAS_API_KEY,
            measurements=[ping],
            sources=[measurement_source],
            is_oneoff=False
        )

        atlas_request._construct_post_data()
        atlas_request.post_data = map_function_on_keys_and_values(atlas_request.post_data, 'definitions',
                                                                  add_measurement_interval)
        
        response = requests.post(ATLAS_API_URL, headers=HEADERS, params=CALL_PARAMS, json=atlas_request.post_data)
        responses.append(response)
        print(response.status_code, ": ", response.text)

    return responses

In [417]:
ping_responses = create_ping_measurements(all_filtered_probes, df_data_centers_by_cluster)

201 :  {"measurements":[67652477]}
201 :  {"measurements":[67652478]}
201 :  {"measurements":[67652479]}
201 :  {"measurements":[67652480]}
201 :  {"measurements":[67652481]}
201 :  {"measurements":[67652482]}
201 :  {"measurements":[67652483]}
201 :  {"measurements":[67652484]}
201 :  {"measurements":[67652485]}
201 :  {"measurements":[67652487]}
201 :  {"measurements":[67652488]}
201 :  {"measurements":[67652489]}
201 :  {"measurements":[67652490]}
201 :  {"measurements":[67652491]}
201 :  {"measurements":[67652492]}
201 :  {"measurements":[67652493]}
201 :  {"measurements":[67652494]}
201 :  {"measurements":[67652495]}
201 :  {"measurements":[67652496]}
201 :  {"measurements":[67652498]}
201 :  {"measurements":[67652499]}
201 :  {"measurements":[67652500]}
201 :  {"measurements":[67652501]}
201 :  {"measurements":[67652502]}
201 :  {"measurements":[67652503]}
201 :  {"measurements":[67652504]}
201 :  {"measurements":[67652505]}
201 :  {"measurements":[67652506]}
201 :  {"measurement

In [418]:
len(ping_responses)

30