Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions tasks/measurements/alexa/alexa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
from typing import Dict, Union
from netunicorn.base import Task, Failure
import subprocess
import pprint
from ping3 import ping
import csv
import json

class AlexaWebsitesTask(Task):
# Measure network metrics for a list of Alexa top websites.
requirements = [
"sudo apt-get install -y curl dnsutils traceroute",
"pip install ping3"
]

def __init__(self, domain: str = None, filepath: str = "alexa_websites.csv", output_path: str = None, top_k: int = 100, *args, **kwargs):
super().__init__(*args, **kwargs)
self.domain = domain
self.filepath = filepath
self.output_path = output_path
self.top_k = top_k

def get_traceroute(self) -> Union[str, Failure]:
try:
result = subprocess.run(["traceroute", "-m", "10", self.domain], capture_output=True, text=True, check=True)
return result.stdout
except Exception as e:
return Failure(f"Traceroute failed: {e}")

def measure_ping(self) -> Union[Dict[str, float], Failure]:
try:
ping_value = ping(self.domain)
if ping_value is None:
return Failure("Ping returned None.")
return {"value": ping_value * 1000, "unit": "ms"}
except Exception as e:
return Failure(f"Ping failed: {e}")

def measure_dns_time(self) -> Union[Dict[str, float], Failure]:
try:
result = subprocess.run(["dig", self.domain], capture_output=True, text=True, check=True)
for line in result.stdout.splitlines():
if "Query time" in line:
return {"value": float(line.split(":")[1].strip().split(" ")[0]), "unit": "ms"}
return Failure("Query time not found in DNS response.")
except Exception as e:
return Failure(f"DNS resolution failed: {e}")

def measure_timing(self) -> Union[Dict[str, Dict[str,float]], Failure]:
try:
result = subprocess.run([
"curl",
"-o", "/dev/null",
"-s",
"-w",
(
"time_appconnect: %{time_appconnect}\n"
"time_connect: %{time_connect}\n"
"time_namelookup: %{time_namelookup}\n"
"time_pretransfer: %{time_pretransfer}\n"
"time_redirect: %{time_redirect}\n"
"time_starttransfer: %{time_starttransfer}\n"
"time_total: %{time_total}\n"
),
"-H", "Cache-Control: no-cache",
f"https://{self.domain}",
], capture_output=True, text=True, check=True)
metrics = {
key.strip(): {"value": float(value.strip()) * 1000, "unit": "ms"}
for line in result.stdout.splitlines()
for key, value in [line.split(": ", 1)]
}
return metrics
except Exception as e:
return Failure(f"Network Timing measurement failed: {e}")

@staticmethod
def load_websites(filepath: str, top_k: int) -> list:
# Load top k websites from a CSV file
websites = []
with open(filepath, 'r') as file:
reader = csv.reader(file)
for row in reader:
if len(websites) < top_k:
websites.append(row[1])
else:
break
return websites

def run(self) -> Union[Dict[str, Dict], Failure]:
if self.domain:
# Run for a single domain
return {
"traceroute": self.get_traceroute(),
"ping_time": self.measure_ping(),
"dns_time": self.measure_dns_time(),
"measure_timing": self.measure_timing(),
}
else:
# Run for all websites in a file
websites = self.load_websites(self.filepath, self.top_k)
print(f"Loaded {len(websites)} websites.")

results = {}
for website in websites:
print(f"Processing: {website}")
try:
self.domain = website
results[website] = self.run()
except Exception as e:
results[website] = Failure(f"Failed to process {website}: {e}")

# Save results to a JSON file if output_path is provided
if self.output_path:
print(f"Saving results to {self.output_path}")
try:
with open(self.output_path, "w") as f:
json.dump(results, f, indent=4)
except Exception as e:
return Failure(f"Failed to write results to file: {e}")
else:
pprint.pp(results)

return results
100 changes: 100 additions & 0 deletions tasks/measurements/alexa/alexa_websites.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
1,google.com
2,facebook.com
3,youtube.com
4,yahoo.com
5,baidu.com
6,wikipedia.org
7,qq.com
8,taobao.com
9,twitter.com
10,amazon.com
11,linkedin.com
12,live.com
13,google.co.in
14,sina.com.cn
15,hao123.com
16,blogspot.com
17,weibo.com
18,tmall.com
19,vk.com
20,wordpress.com
21,yahoo.co.jp
22,sohu.com
23,yandex.ru
24,ebay.com
25,google.de
26,bing.com
27,pinterest.com
28,google.co.uk
29,163.com
30,360.cn
31,google.fr
32,ask.com
33,instagram.com
34,google.co.jp
35,tumblr.com
36,msn.com
37,google.com.br
38,mail.ru
39,microsoft.com
40,xvideos.com
41,paypal.com
42,google.ru
43,soso.com
44,adcash.com
45,google.es
46,google.it
47,imdb.com
48,apple.com
49,imgur.com
50,neobux.com
51,craigslist.org
52,amazon.co.jp
53,t.co
54,xhamster.com
55,stackoverflow.com
56,reddit.com
57,google.com.mx
58,google.com.hk
59,cnn.com
60,google.ca
61,fc2.com
62,go.com
63,ifeng.com
64,bbc.co.uk
65,vube.com
66,people.com.cn
67,blogger.com
68,aliexpress.com
69,odnoklassniki.ru
70,wordpress.org
71,alibaba.com
72,gmw.cn
73,adobe.com
74,huffingtonpost.com
75,google.com.tr
76,xinhuanet.com
77,googleusercontent.com
78,youku.com
79,godaddy.com
80,pornhub.com
81,akamaihd.net
82,thepiratebay.se
83,kickass.to
84,google.com.au
85,amazon.de
86,clkmon.com
87,ebay.de
88,alipay.com
89,google.pl
90,espn.go.com
91,dailymotion.com
92,about.com
93,bp.blogspot.com
94,blogspot.in
95,netflix.com
96,vimeo.com
97,dailymail.co.uk
98,redtube.com
99,rakuten.co.jp
100,conduit.com