netunicorn · maybe-hello-world · Jan 24, 2025 · Jan 22, 2025 · Jan 22, 2025 · Jan 23, 2025
diff --git a/tasks/measurements/alexa/alexa.py b/tasks/measurements/alexa/alexa.py
@@ -0,0 +1,124 @@
+from typing import Dict, Union
+from netunicorn.base import Task, Failure
+import subprocess
+import pprint
+from ping3 import ping
+import csv
+import json
+
+class AlexaWebsitesTask(Task):
+    # Measure network metrics for a list of Alexa top websites.
+    requirements = [
+        "sudo apt-get install -y curl dnsutils traceroute",
+        "pip install ping3"
+    ]
+
+    def __init__(self, domain: str = None, filepath: str = "alexa_websites.csv", output_path: str = None, top_k: int = 100, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.domain = domain
+        self.filepath = filepath
+        self.output_path = output_path
+        self.top_k = top_k
+
+    def get_traceroute(self) -> Union[str, Failure]:
+        try:
+            result = subprocess.run(["traceroute", "-m", "10", self.domain], capture_output=True, text=True, check=True)
+            return result.stdout
+        except Exception as e:
+            return Failure(f"Traceroute failed: {e}")
+
+    def measure_ping(self) -> Union[Dict[str, float], Failure]:
+        try:
+            ping_value = ping(self.domain)
+            if ping_value is None:
+                return Failure("Ping returned None.") 
+            return {"value": ping_value * 1000, "unit": "ms"}
+        except Exception as e:
+            return Failure(f"Ping failed: {e}")
+
+    def measure_dns_time(self) -> Union[Dict[str, float], Failure]:
+        try:
+            result = subprocess.run(["dig", self.domain], capture_output=True, text=True, check=True)
+            for line in result.stdout.splitlines():
+                if "Query time" in line:
+                    return {"value": float(line.split(":")[1].strip().split(" ")[0]), "unit": "ms"}
+            return Failure("Query time not found in DNS response.")
+        except Exception as e:
+            return Failure(f"DNS resolution failed: {e}")
+
+    def measure_timing(self) -> Union[Dict[str, Dict[str,float]], Failure]:
+        try:
+            result = subprocess.run([
+                "curl",
+                "-o", "/dev/null",
+                "-s",
+                "-w", 
+                (
+                    "time_appconnect: %{time_appconnect}\n"
+                    "time_connect: %{time_connect}\n"
+                    "time_namelookup: %{time_namelookup}\n"
+                    "time_pretransfer: %{time_pretransfer}\n"
+                    "time_redirect: %{time_redirect}\n"
+                    "time_starttransfer: %{time_starttransfer}\n"
+                    "time_total: %{time_total}\n"
+                ),
+                "-H", "Cache-Control: no-cache",
+                f"https://{self.domain}",
+            ], capture_output=True, text=True, check=True)
+            metrics = {
+                key.strip(): {"value": float(value.strip()) * 1000, "unit": "ms"}
+                for line in result.stdout.splitlines()
+                for key, value in [line.split(": ", 1)]
+            }
+            return metrics
+        except Exception as e:
+            return Failure(f"Network Timing measurement failed: {e}")
+
+    @staticmethod
+    def load_websites(filepath: str, top_k: int) -> list:
+        # Load top k websites from a CSV file
+        websites = []
+        with open(filepath, 'r') as file:
+            reader = csv.reader(file)
+            for row in reader:
+                if len(websites) < top_k:
+                    websites.append(row[1]) 
+                else:
+                    break
+        return websites
+
+    def run(self) -> Union[Dict[str, Dict], Failure]:
+        if self.domain:
+            # Run for a single domain
+            return {
+                "traceroute": self.get_traceroute(),
+                "ping_time": self.measure_ping(),
+                "dns_time": self.measure_dns_time(),
+                "measure_timing": self.measure_timing(),
+            }
+        else:
+            # Run for all websites in a file
+            websites = self.load_websites(self.filepath, self.top_k)
+            print(f"Loaded {len(websites)} websites.")
+
+            results = {}
+            for website in websites:
+                print(f"Processing: {website}")
+                try:
+                    self.domain = website
+                    results[website] = self.run()
+                except Exception as e:
+                    results[website] = Failure(f"Failed to process {website}: {e}")
+
+            # Save results to a JSON file if output_path is provided
+            if self.output_path:
+                print(f"Saving results to {self.output_path}")
+                try: 
+                    with open(self.output_path, "w") as f:
+                        json.dump(results, f, indent=4)
+                except Exception as e:
+                    return Failure(f"Failed to write results to file: {e}") 
+            else:
+                pprint.pp(results)
+
+            return results
diff --git a/tasks/measurements/alexa/alexa_websites.csv b/tasks/measurements/alexa/alexa_websites.csv
@@ -0,0 +1,100 @@
+1,google.com
+2,facebook.com
+3,youtube.com
+4,yahoo.com
+5,baidu.com
+6,wikipedia.org
+7,qq.com
+8,taobao.com
+9,twitter.com
+10,amazon.com
+11,linkedin.com
+12,live.com
+13,google.co.in
+14,sina.com.cn
+15,hao123.com
+16,blogspot.com
+17,weibo.com
+18,tmall.com
+19,vk.com
+20,wordpress.com
+21,yahoo.co.jp
+22,sohu.com
+23,yandex.ru
+24,ebay.com
+25,google.de
+26,bing.com
+27,pinterest.com
+28,google.co.uk
+29,163.com
+30,360.cn
+31,google.fr
+32,ask.com
+33,instagram.com
+34,google.co.jp
+35,tumblr.com
+36,msn.com
+37,google.com.br
+38,mail.ru
+39,microsoft.com
+40,xvideos.com
+41,paypal.com
+42,google.ru
+43,soso.com
+44,adcash.com
+45,google.es
+46,google.it
+47,imdb.com
+48,apple.com
+49,imgur.com
+50,neobux.com
+51,craigslist.org
+52,amazon.co.jp
+53,t.co
+54,xhamster.com
+55,stackoverflow.com
+56,reddit.com
+57,google.com.mx
+58,google.com.hk
+59,cnn.com
+60,google.ca
+61,fc2.com
+62,go.com
+63,ifeng.com
+64,bbc.co.uk
+65,vube.com
+66,people.com.cn
+67,blogger.com
+68,aliexpress.com
+69,odnoklassniki.ru
+70,wordpress.org
+71,alibaba.com
+72,gmw.cn
+73,adobe.com
+74,huffingtonpost.com
+75,google.com.tr
+76,xinhuanet.com
+77,googleusercontent.com
+78,youku.com
+79,godaddy.com
+80,pornhub.com
+81,akamaihd.net
+82,thepiratebay.se
+83,kickass.to
+84,google.com.au
+85,amazon.de
+86,clkmon.com
+87,ebay.de
+88,alipay.com
+89,google.pl
+90,espn.go.com
+91,dailymotion.com
+92,about.com
+93,bp.blogspot.com
+94,blogspot.in
+95,netflix.com
+96,vimeo.com
+97,dailymail.co.uk
+98,redtube.com
+99,rakuten.co.jp
+100,conduit.com