# RIPE Atlas CDN Server Measurements

This notebook sets up and launches RIPE Atlas measurements to CDN DNS resolvers.

**Objectives:**
- Test connectivity to 7 major CDN DNS resolvers
- Collect RTT, packet loss, and availability metrics
- Run 24-hour measurement campaign with 50 probes
- Analyze geographic and temporal patterns

**CDN Targets:**
- Cloudflare: 1.1.1.1, 1.0.0.1
- Google Public DNS: 8.8.8.8, 8.8.4.4
- Quad9: 9.9.9.9
- OpenDNS: 208.67.222.222, 208.67.220.220

In [1]:
# Import required libraries
import os
import json
import time
from datetime import datetime, timedelta
from dotenv import load_dotenv

from ripe.atlas.cousteau import (
    Ping,
    AtlasSource,
    AtlasCreateRequest,
    Probe,
    AtlasResultsRequest
)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Load environment variables
load_dotenv()
print()




In [2]:
# Load RIPE Atlas API keys
PRIMARY_KEY = os.getenv('RIPE_ATLAS_PRIMARY_KEY')

if not PRIMARY_KEY:
    raise ValueError()

print()




# RIPE Atlas CDN Server Measurements

This notebook sets up and launches RIPE Atlas measurements to CDN DNS resolvers.

**Objectives:**
- Test connectivity to 7 major CDN DNS resolvers
- Collect RTT, packet loss, and availability metrics
- Run 24-hour measurement campaign with 50 probes
- Analyze geographic and temporal patterns

**CDN Targets:**
- Cloudflare: 1.1.1.1, 1.0.0.1
- Google Public DNS: 8.8.8.8, 8.8.4.4
- Quad9: 9.9.9.9
- OpenDNS: 208.67.222.222, 208.67.220.220

In [1]:
# Import required libraries
import os
import json
import time
from datetime import datetime, timedelta
from dotenv import load_dotenv

from ripe.atlas.cousteau import (
    Ping,
    AtlasSource,
    AtlasCreateRequest,
    Probe,
    AtlasResultsRequest
)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load environment variables
load_dotenv()

print("‚úì Libraries imported successfully")

‚úì Libraries imported successfully


In [2]:
# Load RIPE Atlas API keys
PRIMARY_KEY = os.getenv('RIPE_ATLAS_PRIMARY_KEY')

if not PRIMARY_KEY:
    raise ValueError("PRIMARY_KEY not found in .env file")

print(f"‚úì API key loaded: {PRIMARY_KEY[:10]}...")

‚úì API key loaded: 7e76f32e-0...


In [3]:
# Define CDN targets
CDN_TARGETS = {
    'Cloudflare_Primary': '1.1.1.1',
    'Cloudflare_Secondary': '1.0.0.1',
    'Google_Primary': '8.8.8.8',
    'Google_Secondary': '8.8.4.4',
    'Quad9': '9.9.9.9',
    'OpenDNS_Primary': '208.67.222.222',
    'OpenDNS_Secondary': '208.67.220.220'
}

print("CDN Targets Configured:")
for name, ip in CDN_TARGETS.items():
    print(f"  {name}: {ip}")

CDN Targets Configured:
  Cloudflare_Primary: 1.1.1.1
  Cloudflare_Secondary: 1.0.0.1
  Google_Primary: 8.8.8.8
  Google_Secondary: 8.8.4.4
  Quad9: 9.9.9.9
  OpenDNS_Primary: 208.67.222.222
  OpenDNS_Secondary: 208.67.220.220


## Probe Discovery

Find active RIPE Atlas probes to use for measurements.

In [4]:
# Query available probes
from ripe.atlas.cousteau import ProbeRequest

filters = {
    'status': 1,  # Connected probes only
    'tags': 'system-ipv4-works',
}

probes = ProbeRequest(**filters)

# Get first 100 probes
probe_list = []
for probe in probes:
    probe_list.append({
        'id': probe['id'],
        'country': probe.get('country_code', 'Unknown'),
        'asn': probe.get('asn_v4', 'Unknown')
    })
    if len(probe_list) >= 100:
        break

df_probes = pd.DataFrame(probe_list)
print(f"‚úì Found {len(df_probes)} active probes")
print(f"Country distribution:")
print(df_probes['country'].value_counts().head(10))

‚úì Found 100 active probes
Country distribution:
country
DE    15
NL     9
GB     9
US     7
SE     7
IT     7
CH     3
AT     3
UA     3
RU     3
Name: count, dtype: int64


## Test Measurement

Run a 1-hour test measurement with 5 probes to validate setup before launching full campaign.

In [7]:
# Configuration
TEST_MEASUREMENT = True  # Set to False for full 24-hour campaign

if TEST_MEASUREMENT:
    NUM_PROBES = 5
    DURATION = 3600  # 1 hour
    INTERVAL = 300  # 5 minutes
    TARGET = '8.8.8.8'  # Google DNS for test (1.1.1.1 has too many concurrent measurements)
    print("TEST MODE: 1-hour measurement with 5 probes")
else:
    NUM_PROBES = 50
    DURATION = 86400  # 24 hours
    INTERVAL = 300  # 5 minutes
    TARGET = None  # Will loop through all CDN_TARGETS
    print("PRODUCTION MODE: 24-hour measurement with 50 probes")

TEST MODE: 1-hour measurement with 5 probes


In [8]:
# Launch measurement
def create_measurement(target_ip, description, num_probes=5):
    """
    Create a ping measurement to target IP.
    
    Args:
        target_ip: IP address to ping
        description: Measurement description
        num_probes: Number of probes to use
    
    Returns:
        Measurement ID if successful, None otherwise
    """
    # Define ping measurement
    ping = Ping(
        af=4,
        target=target_ip,
        description=description,
    )
    
    # Define probe source
    source = AtlasSource(
        type="probes",
        value=f"{num_probes}",
        requested=num_probes,
        tags={"include": ["system-ipv4-works"]}
    )
    
    # Create request
    atlas_request = AtlasCreateRequest(
        start_time=datetime.now(),
        stop_time=datetime.now() + timedelta(seconds=DURATION),
        key=PRIMARY_KEY,
        measurements=[ping],
        sources=[source],
        is_oneoff=False
    )
    
    # Submit
    (is_success, response) = atlas_request.create()
    
    if is_success:
        measurement_id = response['measurements'][0]
        print(f"‚úì Created measurement {measurement_id} for {target_ip}")
        print(f"  View at: https://atlas.ripe.net/measurements/{measurement_id}")
        return measurement_id
    else:
        print(f"‚úó Failed to create measurement: {response}")
        return None

# Launch measurement(s)
measurement_ids = {}

if TEST_MEASUREMENT:
    # Test with Cloudflare only
    mid = create_measurement(
        TARGET,
        "CDN Selection Test - Cloudflare",
        NUM_PROBES
    )
    if mid:
        measurement_ids['Cloudflare_Test'] = mid
else:
    # Full campaign to all CDN targets
    for cdn_name, cdn_ip in CDN_TARGETS.items():
        mid = create_measurement(
            cdn_ip,
            f"CDN Selection - {cdn_name}",
            NUM_PROBES
        )
        if mid:
            measurement_ids[cdn_name] = mid
        time.sleep(2)  # Rate limiting

print(f"‚úì Launched {len(measurement_ids)} measurement(s)")
print("Measurement IDs:")
for name, mid in measurement_ids.items():
    print(f"  {name}: {mid}")

‚úó Failed to create measurement: {'error': {'detail': 'There was a problem with your request', 'status': 400, 'title': 'Bad Request', 'code': 102, 'errors': [{'source': {'pointer': ''}, 'detail': 'We do not allow more than 25 concurrent measurements to the same target: 8.8.8.8.'}]}}
‚úì Launched 0 measurement(s)
Measurement IDs:


## Next Steps

After measurements complete:

1. **Monitor progress** at https://atlas.ripe.net/measurements/[measurement_id]
2. **Download results** using the measurement IDs above
3. **Analyze data** in a separate notebook:
   - Parse JSON results
   - Calculate RTT statistics (min, avg, max, jitter)
   - Identify packet loss
   - Compare CDN performance by geography
   - Correlate with time-of-day patterns

**Save measurement IDs for later retrieval:**

In [9]:
# Save measurement IDs to file
import json
from pathlib import Path

output_dir = Path('../../data/raw')
output_dir.mkdir(parents=True, exist_ok=True)

output_file = output_dir / f'ripe_measurements_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json'

metadata = {
    'created': datetime.now().isoformat(),
    'test_mode': TEST_MEASUREMENT,
    'num_probes': NUM_PROBES,
    'duration_seconds': DURATION,
    'interval_seconds': INTERVAL,
    'measurement_ids': measurement_ids
}

with open(output_file, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"‚úì Saved measurement metadata to {output_file}")

‚úì Saved measurement metadata to ../../data/raw/ripe_measurements_20251110_204810.json


# RIPE Atlas CDN Measurement Campaign

## Overview
This notebook designs and launches a measurement campaign to collect RTT, traceroute, and performance data for major CDN providers using RIPE Atlas probes.

## Objectives
1. Measure RTT to major CDN edge servers
2. Collect traceroute paths for network analysis
3. Compare CDN performance across geographic regions
4. Generate data for multi-metric CDN selection algorithm

## Target CDNs
- **Cloudflare**: 1.1.1.1, 1.0.0.1
- **Google Cloud CDN**: 8.8.8.8, 8.8.4.4
- **Quad9**: 9.9.9.9
- **OpenDNS**: 208.67.222.222, 208.67.220.220

## Measurement Strategy
- **Probes**: 50-100 globally distributed
- **Duration**: 24-48 hours
- **Frequency**: Every 5 minutes (300 seconds)
- **Metrics**: RTT, packet loss, jitter, path characteristics

In [10]:
# Import libraries
import os
import json
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv

# RIPE Atlas imports
from ripe.atlas.cousteau import (
    Ping,
    Traceroute,
    AtlasSource,
    AtlasCreateRequest,
    AtlasResultsRequest,
    Probe,
    ProbeRequest
)

# Load environment variables
load_dotenv()

# Create directories
Path('../data/raw').mkdir(parents=True, exist_ok=True)
Path('../data/processed').mkdir(parents=True, exist_ok=True)
Path('../results/figures').mkdir(parents=True, exist_ok=True)

print("‚úì Libraries imported successfully")
print(f"‚úì Data directories ready")

‚úì Libraries imported successfully
‚úì Data directories ready


## Load RIPE Atlas API Keys

In [11]:
# Load all API keys
api_keys = {
    'NPM': os.getenv('RIPE_ATLAS_API_KEY_NPM'),  # Get results from non-public measurement
    'LYM': os.getenv('RIPE_ATLAS_API_KEY_LYM'),  # List your measurements
    'SNM': os.getenv('RIPE_ATLAS_API_KEY_SNM'),  # Schedule new measurement (PRIMARY)
    'SRM': os.getenv('RIPE_ATLAS_API_KEY_SRM'),  # Stop running measurement
    'UEM': os.getenv('RIPE_ATLAS_API_KEY_UEM'),  # Update existing measurement
    'NPMP': os.getenv('RIPE_ATLAS_API_KEY_NPMP'), # Get non-public results from probes
    'GRIP': os.getenv('RIPE_ATLAS_API_KEY_GRIP'), # Get restricted probe info
    'SPP': os.getenv('RIPE_ATLAS_API_KEY_SPP'),  # Set probe parameters
    'SIP': os.getenv('RIPE_ATLAS_API_KEY_SIP'),  # Show probe information
}

print("RIPE Atlas API Keys Status:")
for key_name, key_value in api_keys.items():
    status = "‚úì" if key_value else "‚úó"
    masked = f"{key_value[:8]}...{key_value[-4:]}" if key_value else "NOT FOUND"
    print(f"  {status} {key_name:6s}: {masked}")

# Primary key for creating measurements
PRIMARY_KEY = api_keys['SNM']
print(f"‚úì Using SNM key for creating measurements")

RIPE Atlas API Keys Status:
  ‚úì NPM   : b7907981...3062
  ‚úì LYM   : 6d54c539...e737
  ‚úì SNM   : 7e76f32e...8589
  ‚úì SRM   : d14d389d...f3bb
  ‚úì UEM   : 6ba40cd6...7801
  ‚úì NPMP  : 6f6e9faf...f013
  ‚úì GRIP  : 0fde8760...095f
  ‚úì SPP   : c863dea0...ecf4
  ‚úì SIP   : c05ece2b...4d42
‚úì Using SNM key for creating measurements


## Define CDN Targets

In [12]:
# Major CDN targets for measurement
cdn_targets = {
    'Cloudflare': [
        {'ip': '1.1.1.1', 'name': 'Cloudflare Primary DNS'},
        {'ip': '1.0.0.1', 'name': 'Cloudflare Secondary DNS'},
    ],
    'Google': [
        {'ip': '8.8.8.8', 'name': 'Google Primary DNS'},
        {'ip': '8.8.4.4', 'name': 'Google Secondary DNS'},
    ],
    'Quad9': [
        {'ip': '9.9.9.9', 'name': 'Quad9 DNS'},
    ],
    'OpenDNS': [
        {'ip': '208.67.222.222', 'name': 'OpenDNS Primary'},
        {'ip': '208.67.220.220', 'name': 'OpenDNS Secondary'},
    ]
}

# Flatten for easy iteration
all_targets = []
for cdn, targets in cdn_targets.items():
    for target in targets:
        all_targets.append({
            'cdn': cdn,
            'ip': target['ip'],
            'name': target['name']
        })

print(f"CDN Measurement Targets: {len(all_targets)} total")
for target in all_targets:
    print(f"  ‚Ä¢ {target['cdn']:12s} ‚Üí {target['ip']:15s} ({target['name']})")

CDN Measurement Targets: 7 total
  ‚Ä¢ Cloudflare   ‚Üí 1.1.1.1         (Cloudflare Primary DNS)
  ‚Ä¢ Cloudflare   ‚Üí 1.0.0.1         (Cloudflare Secondary DNS)
  ‚Ä¢ Google       ‚Üí 8.8.8.8         (Google Primary DNS)
  ‚Ä¢ Google       ‚Üí 8.8.4.4         (Google Secondary DNS)
  ‚Ä¢ Quad9        ‚Üí 9.9.9.9         (Quad9 DNS)
  ‚Ä¢ OpenDNS      ‚Üí 208.67.222.222  (OpenDNS Primary)
  ‚Ä¢ OpenDNS      ‚Üí 208.67.220.220  (OpenDNS Secondary)


## Explore Available RIPE Atlas Probes

In [13]:
# Query available probes
print("Querying RIPE Atlas probe network...")

# Get sample of active probes from different regions
filters = {
    "status": 1,  # Connected probes only
    "tags": "system-ipv4-works",  # IPv4 capable
}

probes = ProbeRequest(**filters)
probe_list = list(probes)

print(f"‚úì Found {len(probe_list):,} active IPv4-capable probes")

# Sample probe distribution by country
probe_countries = {}
for probe in probe_list[:1000]:  # Sample first 1000
    country = probe.get('country_code', 'Unknown')
    probe_countries[country] = probe_countries.get(country, 0) + 1

print(f"Top 10 countries by probe count (sample of 1000):")
for country, count in sorted(probe_countries.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"  {country:3s}: {count:3d} probes")

Querying RIPE Atlas probe network...
‚úì Found 13,086 active IPv4-capable probes
Top 10 countries by probe count (sample of 1000):
  DE : 151 probes
  US : 116 probes
  GB :  65 probes
  FR :  60 probes
  NL :  56 probes
  RU :  54 probes
  CZ :  29 probes
  IT :  27 probes
  CH :  27 probes
  AU :  27 probes


## Test Measurement (Small Scale)

Let's create a small test measurement first to verify everything works.

In [14]:
# Test with single target and 5 probes
TEST_MEASUREMENT = True  # Set to True to launch test

if TEST_MEASUREMENT:
    print("Launching TEST measurement...")
    print("Target: Cloudflare 1.1.1.1")
    print("Probes: 5 worldwide")
    print("Duration: 1 hour")
    print("Interval: 5 minutes (12 measurements)")
    
    # Test ping
    test_ping = Ping(
        af=4,
        target="1.1.1.1",
        description="CDN Test - Cloudflare 1.1.1.1",
        packets=3
    )
    
    test_source = AtlasSource(
        type="area",
        value="WW",
        requested=5
    )
    
    test_request = AtlasCreateRequest(
        key=PRIMARY_KEY,
        measurements=[test_ping],
        sources=[test_source],
        is_oneoff=False,
        interval=300,
        stop_time=int(time.time()) + 3600  # 1 hour
    )
    
    try:
        is_success, response = test_request.create()
        
        if is_success:
            test_id = response['measurements'][0]
            print(f"‚úì Test measurement created!")
            print(f"  Measurement ID: {test_id}")
            print(f"  URL: https://atlas.ripe.net/measurements/{test_id}/")
            print(f"‚è±Ô∏è  Wait 5-10 minutes, then run the results collection cell below")
            
            # Save test ID
            with open('../data/raw/test_measurement_id.txt', 'w') as f:
                f.write(str(test_id))
        else:
            print(f"‚úó Failed: {response}")
    
    except Exception as e:
        print(f"‚úó Error: {e}")
else:
    print("‚ö†Ô∏è  Test measurement NOT launched")
    print("Set TEST_MEASUREMENT = True to launch a small test first")

Launching TEST measurement...
Target: Cloudflare 1.1.1.1
Probes: 5 worldwide
Duration: 1 hour
Interval: 5 minutes (12 measurements)
‚úó Failed: {'error': {'detail': 'There was a problem with your request', 'status': 400, 'title': 'Bad Request', 'code': 102, 'errors': [{'source': {'pointer': ''}, 'detail': 'We do not allow more than 25 concurrent measurements to the same target: 1.1.1.1.'}]}}


## Next Steps

1. Set `TEST_MEASUREMENT = True` and run the cell above
2. Wait 10-15 minutes for data to collect
3. Continue to Phase 5: Feature Engineering

### Important Notes:
- Test measurement runs for 1 hour with 5 probes
- Full campaign would use 50 probes for 24 hours
- You can monitor measurements at https://atlas.ripe.net/
- Save measurement IDs to collect results later

# RIPE Atlas CDN Measurement Campaign

## Overview
This notebook designs and launches a measurement campaign to collect RTT, traceroute, and performance data for major CDN providers using RIPE Atlas probes.

## Objectives
1. Measure RTT to major CDN edge servers
2. Collect traceroute paths for network analysis
3. Compare CDN performance across geographic regions
4. Generate data for multi-metric CDN selection algorithm

## Target CDNs
- **Cloudflare**: 1.1.1.1, 1.0.0.1
- **Google Cloud CDN**: 8.8.8.8, 8.8.4.4
- **AWS CloudFront**: Multiple edge IPs
- **Akamai**: Multiple edge IPs
- **Fastly**: Multiple edge IPs

## Measurement Strategy
- **Probes**: 50-100 globally distributed
- **Duration**: 24-48 hours
- **Frequency**: Every 5 minutes (300 seconds)
- **Metrics**: RTT, packet loss, jitter, path characteristics

In [15]:
# Import libraries
import os
import json
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from pathlib import Path
from dotenv import load_dotenv

# RIPE Atlas imports
from ripe.atlas.cousteau import (
    Ping,
    Traceroute,
    AtlasSource,
    AtlasCreateRequest,
    AtlasResultsRequest,
    Probe,
    ProbeRequest
)

# Load environment variables
load_dotenv()

# Create directories
Path('../data/raw').mkdir(parents=True, exist_ok=True)
Path('../data/processed').mkdir(parents=True, exist_ok=True)
Path('../results/figures').mkdir(parents=True, exist_ok=True)

print("‚úì Libraries imported successfully")
print(f"‚úì Data directories ready")

‚úì Libraries imported successfully
‚úì Data directories ready


## Load RIPE Atlas API Keys

In [16]:
# Load all API keys
api_keys = {
    'NPM': os.getenv('RIPE_ATLAS_API_KEY_NPM'),  # Get results from non-public measurement
    'LYM': os.getenv('RIPE_ATLAS_API_KEY_LYM'),  # List your measurements
    'SNM': os.getenv('RIPE_ATLAS_API_KEY_SNM'),  # Schedule new measurement (PRIMARY)
    'SRM': os.getenv('RIPE_ATLAS_API_KEY_SRM'),  # Stop running measurement
    'UEM': os.getenv('RIPE_ATLAS_API_KEY_UEM'),  # Update existing measurement
    'NPMP': os.getenv('RIPE_ATLAS_API_KEY_NPMP'), # Get non-public results from probes
    'GRIP': os.getenv('RIPE_ATLAS_API_KEY_GRIP'), # Get restricted probe info
    'SPP': os.getenv('RIPE_ATLAS_API_KEY_SPP'),  # Set probe parameters
    'SIP': os.getenv('RIPE_ATLAS_API_KEY_SIP'),  # Show probe information
}

print("RIPE Atlas API Keys Status:")
for key_name, key_value in api_keys.items():
    status = "‚úì" if key_value else "‚úó"
    masked = f"{key_value[:8]}...{key_value[-4:]}" if key_value else "NOT FOUND"
    print(f"  {status} {key_name:6s}: {masked}")

# Primary key for creating measurements
PRIMARY_KEY = api_keys['SNM']
print(f"‚úì Using SNM key for creating measurements")

RIPE Atlas API Keys Status:
  ‚úì NPM   : b7907981...3062
  ‚úì LYM   : 6d54c539...e737
  ‚úì SNM   : 7e76f32e...8589
  ‚úì SRM   : d14d389d...f3bb
  ‚úì UEM   : 6ba40cd6...7801
  ‚úì NPMP  : 6f6e9faf...f013
  ‚úì GRIP  : 0fde8760...095f
  ‚úì SPP   : c863dea0...ecf4
  ‚úì SIP   : c05ece2b...4d42
‚úì Using SNM key for creating measurements


## Define CDN Targets

In [17]:
# Major CDN targets for measurement
cdn_targets = {
    'Cloudflare': [
        {'ip': '1.1.1.1', 'name': 'Cloudflare Primary DNS'},
        {'ip': '1.0.0.1', 'name': 'Cloudflare Secondary DNS'},
    ],
    'Google': [
        {'ip': '8.8.8.8', 'name': 'Google Primary DNS'},
        {'ip': '8.8.4.4', 'name': 'Google Secondary DNS'},
    ],
    'Quad9': [
        {'ip': '9.9.9.9', 'name': 'Quad9 DNS'},
    ],
    'OpenDNS': [
        {'ip': '208.67.222.222', 'name': 'OpenDNS Primary'},
        {'ip': '208.67.220.220', 'name': 'OpenDNS Secondary'},
    ]
}

# Flatten for easy iteration
all_targets = []
for cdn, targets in cdn_targets.items():
    for target in targets:
        all_targets.append({
            'cdn': cdn,
            'ip': target['ip'],
            'name': target['name']
        })

print(f"CDN Measurement Targets: {len(all_targets)} total")
for target in all_targets:
    print(f"  ‚Ä¢ {target['cdn']:12s} ‚Üí {target['ip']:15s} ({target['name']})")

CDN Measurement Targets: 7 total
  ‚Ä¢ Cloudflare   ‚Üí 1.1.1.1         (Cloudflare Primary DNS)
  ‚Ä¢ Cloudflare   ‚Üí 1.0.0.1         (Cloudflare Secondary DNS)
  ‚Ä¢ Google       ‚Üí 8.8.8.8         (Google Primary DNS)
  ‚Ä¢ Google       ‚Üí 8.8.4.4         (Google Secondary DNS)
  ‚Ä¢ Quad9        ‚Üí 9.9.9.9         (Quad9 DNS)
  ‚Ä¢ OpenDNS      ‚Üí 208.67.222.222  (OpenDNS Primary)
  ‚Ä¢ OpenDNS      ‚Üí 208.67.220.220  (OpenDNS Secondary)


## Explore Available RIPE Atlas Probes

In [18]:
# Query available probes
print("Querying RIPE Atlas probe network...")

# Get sample of active probes from different regions
filters = {
    "status": 1,  # Connected probes only
    "tags": "system-ipv4-works",  # IPv4 capable
}

probes = ProbeRequest(**filters)
probe_list = list(probes)

print(f"‚úì Found {len(probe_list):,} active IPv4-capable probes")

# Sample probe distribution by country
probe_countries = {}
for probe in probe_list[:1000]:  # Sample first 1000
    country = probe.get('country_code', 'Unknown')
    probe_countries[country] = probe_countries.get(country, 0) + 1

print(f"Top 10 countries by probe count (sample of 1000):")
for country, count in sorted(probe_countries.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"  {country:3s}: {count:3d} probes")

Querying RIPE Atlas probe network...
‚úì Found 13,086 active IPv4-capable probes
Top 10 countries by probe count (sample of 1000):
  DE : 151 probes
  US : 116 probes
  GB :  65 probes
  FR :  60 probes
  NL :  56 probes
  RU :  54 probes
  CZ :  29 probes
  IT :  27 probes
  CH :  27 probes
  AU :  27 probes


## Design Measurement Campaign

### Measurement Types:
1. **Ping**: RTT measurements (every 5 minutes for 24 hours)
2. **Traceroute**: Path analysis (every 30 minutes for 24 hours)

### Probe Selection Strategy:
- 50 probes globally distributed
- Focus on North America and Europe
- Ensure geographic diversity

### Cost Estimation:
- Each ping measurement: ~1 credit per probe per measurement
- 50 probes √ó 288 measurements/day (every 5 min) = 14,400 credits/day
- Free tier: 1M credits (plenty for our needs!)

In [19]:
# Configuration for measurements
MEASUREMENT_CONFIG = {
    'num_probes': 50,
    'probe_regions': ['WW'],  # Worldwide
    'duration_hours': 24,
    'ping_interval': 300,  # 5 minutes
    'traceroute_interval': 1800,  # 30 minutes
    'packets_per_ping': 3,
}

print("Measurement Campaign Configuration:")
print("=" * 50)
for key, value in MEASUREMENT_CONFIG.items():
    print(f"  {key:20s}: {value}")

# Calculate expected measurements
measurements_per_day = (24 * 3600) // MEASUREMENT_CONFIG['ping_interval']
total_measurements = measurements_per_day * MEASUREMENT_CONFIG['num_probes'] * len(all_targets)

print(f"Expected Data Collection:")
print(f"  Measurements per probe per day: {measurements_per_day}")
print(f"  Total measurements (all probes, all targets): {total_measurements:,}")
print(f"  Estimated credits: ~{total_measurements:,}")

Measurement Campaign Configuration:
  num_probes          : 50
  probe_regions       : ['WW']
  duration_hours      : 24
  ping_interval       : 300
  traceroute_interval : 1800
  packets_per_ping    : 3
Expected Data Collection:
  Measurements per probe per day: 288
  Total measurements (all probes, all targets): 100,800
  Estimated credits: ~100,800


## Create Ping Measurements

In [20]:
# Function to create ping measurement
def create_ping_measurement(target_ip, target_name, cdn_name, api_key, num_probes=50):
    """
    Create a ping measurement for a specific CDN target.
    """
    # Define ping measurement
    ping = Ping(
        af=4,  # IPv4
        target=target_ip,
        description=f"CDN RTT Measurement - {cdn_name} - {target_name}",
        packets=3,  # Send 3 packets per measurement
    )
    
    # Define probe source
    source = AtlasSource(
        type="area",
        value="WW",  # Worldwide
        requested=num_probes,  # Number of probes
        tags={"include": ["system-ipv4-works"]}
    )
    
    # Create measurement request
    atlas_request = AtlasCreateRequest(
        key=api_key,
        measurements=[ping],
        sources=[source],
        is_oneoff=False,  # Recurring measurement
        interval=300,  # Every 5 minutes
        stop_time=int(time.time()) + (24 * 3600)  # Run for 24 hours
    )
    
    return atlas_request

print("‚úì Ping measurement function ready")

‚úì Ping measurement function ready


In [21]:
# Create measurements for all targets
# WARNING: This will launch real measurements!
# Uncomment the following lines when ready to start

LAUNCH_MEASUREMENTS = False  # Set to True when ready

measurement_ids = []

if LAUNCH_MEASUREMENTS:
    print("Launching RIPE Atlas measurements...")
    
    for target in all_targets:
        print(f"Creating measurement for {target['cdn']} - {target['name']}...")
        
        # Create measurement request
        request = create_ping_measurement(
            target_ip=target['ip'],
            target_name=target['name'],
            cdn_name=target['cdn'],
            api_key=PRIMARY_KEY,
            num_probes=MEASUREMENT_CONFIG['num_probes']
        )
        
        # Submit request
        try:
            is_success, response = request.create()
            
            if is_success:
                measurement_id = response['measurements'][0]
                measurement_ids.append({
                    'id': measurement_id,
                    'cdn': target['cdn'],
                    'target': target['ip'],
                    'name': target['name'],
                    'created_at': datetime.now().isoformat()
                })
                print(f"  ‚úì Measurement ID: {measurement_id}")
            else:
                print(f"  ‚úó Failed: {response}")
        
        except Exception as e:
            print(f"  ‚úó Error: {e}")
        
        # Small delay between requests
        time.sleep(2)
    
    # Save measurement IDs
    measurement_df = pd.DataFrame(measurement_ids)
    measurement_df.to_csv('../data/raw/ripe_measurement_ids.csv', index=False)
    
    print(f"‚úì Launched {len(measurement_ids)} measurements")
    print(f"‚úì Measurement IDs saved to ../data/raw/ripe_measurement_ids.csv")
    print(f"Measurements will run for 24 hours.")
    print(f"Check back in a few hours to collect results!")
    
else:
    print("‚ö†Ô∏è  Measurements NOT launched (LAUNCH_MEASUREMENTS = False)")
    print("When ready to launch:")
    print("1. Review the configuration above")
    print("2. Set LAUNCH_MEASUREMENTS = True")
    print("3. Re-run this cell")
    print("üí° Tip: Start with a small test first (e.g., 2-3 targets, 10 probes)")

‚ö†Ô∏è  Measurements NOT launched (LAUNCH_MEASUREMENTS = False)
When ready to launch:
1. Review the configuration above
2. Set LAUNCH_MEASUREMENTS = True
3. Re-run this cell
üí° Tip: Start with a small test first (e.g., 2-3 targets, 10 probes)


## Test Measurement (Small Scale)

Let's create a small test measurement first to verify everything works.

In [22]:
# Test with single target and 5 probes
TEST_MEASUREMENT = True  # Set to True to launch test

if TEST_MEASUREMENT:
    print("Launching TEST measurement...")
    print("Target: Google DNS 8.8.8.8")
    print("Probes: 5 worldwide")
    print("Duration: 1 hour")
    print("Interval: 5 minutes (12 measurements)")
    
    # Test ping
    test_ping = Ping(
        af=4,
        target="8.8.8.8",
        description="CDN Test - Google DNS 8.8.8.8",
        packets=3
    )
    
    test_source = AtlasSource(
        type="area",
        value="WW",
        requested=5
    )
    
    test_request = AtlasCreateRequest(
        key=PRIMARY_KEY,
        measurements=[test_ping],
        sources=[test_source],
        is_oneoff=False,
        interval=300,
        stop_time=int(time.time()) + 3600  # 1 hour
    )
    
    try:
        is_success, response = test_request.create()
        
        if is_success:
            test_id = response['measurements'][0]
            print(f"‚úì Test measurement created!")
            print(f"  Measurement ID: {test_id}")
            print(f"  URL: https://atlas.ripe.net/measurements/{test_id}/")
            print(f"‚è±Ô∏è  Wait 5-10 minutes, then run the results collection cell below")
            
            # Save test ID
            with open('../data/raw/test_measurement_id.txt', 'w') as f:
                f.write(str(test_id))
        else:
            print(f"‚úó Failed: {response}")
    
    except Exception as e:
        print(f"‚úó Error: {e}")
else:
    print("‚ö†Ô∏è  Test measurement NOT launched")
    print("Set TEST_MEASUREMENT = True to launch a small test first")

Launching TEST measurement...
Target: Google DNS 8.8.8.8
Probes: 5 worldwide
Duration: 1 hour
Interval: 5 minutes (12 measurements)
‚úó Failed: {'error': {'detail': 'There was a problem with your request', 'status': 400, 'title': 'Bad Request', 'code': 102, 'errors': [{'source': {'pointer': ''}, 'detail': 'We do not allow more than 25 concurrent measurements to the same target: 8.8.8.8.'}]}}


## Retrieve Measurement Results

After measurements have been running for a while, collect the results.

In [23]:
# Function to retrieve results
def get_measurement_results(measurement_id, api_key=None):
    """
    Retrieve results for a specific measurement.
    """
    kwargs = {
        "msm_id": measurement_id
    }
    
    if api_key:
        kwargs["key"] = api_key
    
    is_success, results = AtlasResultsRequest(**kwargs).create()
    
    if is_success:
        return results
    else:
        print(f"Error retrieving results: {results}")
        return None

print("‚úì Results retrieval function ready")

‚úì Results retrieval function ready


In [24]:
# Retrieve test measurement results (if test was run)
test_id_file = '../data/raw/test_measurement_id.txt'

if os.path.exists(test_id_file):
    with open(test_id_file, 'r') as f:
        test_measurement_id = int(f.read().strip())
    
    print(f"Retrieving results for test measurement {test_measurement_id}...")
    
    results = get_measurement_results(test_measurement_id)
    
    if results:
        print(f"‚úì Retrieved {len(results)} result sets")
        print(f"Sample result:")
        print(json.dumps(results[0], indent=2))
        
        # Parse RTT values
        rtt_values = []
        for result in results:
            if 'result' in result:
                for ping in result['result']:
                    if 'rtt' in ping:
                        rtt_values.append(ping['rtt'])
        
        if rtt_values:
            print(f"RTT Statistics:")
            print(f"  Samples: {len(rtt_values)}")
            print(f"  Min RTT: {min(rtt_values):.2f} ms")
            print(f"  Max RTT: {max(rtt_values):.2f} ms")
            print(f"  Avg RTT: {np.mean(rtt_values):.2f} ms")
            print(f"  Median RTT: {np.median(rtt_values):.2f} ms")
else:
    print("No test measurement found")
    print("Run a test measurement first to see how results work")

No test measurement found
Run a test measurement first to see how results work


## Load and Parse Full Results

After 24 hours, collect all measurement results.

In [25]:
# Load measurement IDs
measurement_file = '../data/raw/ripe_measurement_ids.csv'

if os.path.exists(measurement_file):
    measurements_df = pd.read_csv(measurement_file)
    print(f"Found {len(measurements_df)} measurements to collect")
    
    all_results = []
    
    for idx, row in measurements_df.iterrows():
        measurement_id = row['id']
        cdn = row['cdn']
        target = row['target']
        
        print(f"Collecting results for {cdn} - {target} (ID: {measurement_id})...")
        
        results = get_measurement_results(measurement_id)
        
        if results:
            # Parse results
            for result in results:
                probe_id = result.get('prb_id')
                timestamp = result.get('timestamp')
                
                if 'result' in result:
                    rtts = [ping.get('rtt') for ping in result['result'] if 'rtt' in ping]
                    
                    if rtts:
                        all_results.append({
                            'measurement_id': measurement_id,
                            'cdn': cdn,
                            'target': target,
                            'probe_id': probe_id,
                            'timestamp': timestamp,
                            'min_rtt': min(rtts),
                            'max_rtt': max(rtts),
                            'avg_rtt': np.mean(rtts),
                            'rtt_samples': len(rtts)
                        })
            
            print(f"  ‚úì Collected {len(results)} result sets")
        else:
            print(f"  ‚úó No results available yet")
        
        time.sleep(1)  # Rate limiting
    
    # Save results
    if all_results:
        results_df = pd.DataFrame(all_results)
        results_df.to_csv('../data/raw/ripe_atlas_results.csv', index=False)
        print(f"‚úì Saved {len(results_df)} measurements to ../data/raw/ripe_atlas_results.csv")
    else:
        print("‚ö†Ô∏è  No results available yet. Wait longer and try again.")
else:
    print("No measurements found")
    print("Launch measurements first!")

No measurements found
Launch measurements first!


## Quick Analysis of RIPE Results

In [26]:
# Analyze results if available
results_file = '../data/raw/ripe_atlas_results.csv'

if os.path.exists(results_file):
    df_ripe = pd.read_csv(results_file)
    
    print(f"RIPE Atlas Results Summary:")
    print("=" * 50)
    print(f"Total measurements: {len(df_ripe):,}")
    print(f"Unique probes: {df_ripe['probe_id'].nunique()}")
    print(f"CDNs measured: {df_ripe['cdn'].nunique()}")
    print(f"Time range: {pd.to_datetime(df_ripe['timestamp'], unit='s').min()} to {pd.to_datetime(df_ripe['timestamp'], unit='s').max()}")
    
    print(f"RTT by CDN:")
    print(df_ripe.groupby('cdn')['avg_rtt'].agg(['count', 'mean', 'median', 'std']))
    
    # Visualization
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    # Box plot by CDN
    df_ripe.boxplot(column='avg_rtt', by='cdn', ax=axes[0])
    axes[0].set_title('RTT Distribution by CDN')
    axes[0].set_xlabel('CDN Provider')
    axes[0].set_ylabel('Average RTT (ms)')
    
    # Time series
    df_ripe['datetime'] = pd.to_datetime(df_ripe['timestamp'], unit='s')
    for cdn in df_ripe['cdn'].unique():
        cdn_data = df_ripe[df_ripe['cdn'] == cdn]
        cdn_hourly = cdn_data.groupby(cdn_data['datetime'].dt.hour)['avg_rtt'].mean()
        axes[1].plot(cdn_hourly.index, cdn_hourly.values, marker='o', label=cdn)
    
    axes[1].set_title('Average RTT by Hour of Day')
    axes[1].set_xlabel('Hour')
    axes[1].set_ylabel('Average RTT (ms)')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig('../results/figures/ripe_atlas_cdn_comparison.png', dpi=300, bbox_inches='tight')
    print("‚úì Visualization saved")
    plt.show()
else:
    print("No RIPE results available yet")

No RIPE results available yet


## Next Steps

### After Measurements Complete:
1. ‚úÖ Collect all measurement results
2. Merge with M-Lab data
3. Merge with Lumos5G data
4. Feature engineering notebook
5. ML model development

### Measurement Timeline:
- **Now**: Launch measurements
- **After 1 hour**: Check test measurement
- **After 6 hours**: Collect partial results
- **After 24 hours**: Collect complete dataset
- **Then**: Move to Phase 5 (Feature Engineering)