In [23]:
! pip install aiohttp

Collecting aiohttp
  Using cached aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl.metadata (7.5 kB)
Collecting aiosignal>=1.1.2 (from aiohttp)
  Using cached aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)
Collecting attrs>=17.3.0 (from aiohttp)
  Using cached attrs-23.2.0-py3-none-any.whl.metadata (9.5 kB)
Collecting frozenlist>=1.1.1 (from aiohttp)
  Using cached frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl.metadata (12 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp)
  Using cached multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl.metadata (4.2 kB)
Collecting yarl<2.0,>=1.0 (from aiohttp)
  Using cached yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl.metadata (31 kB)
Using cached aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl (390 kB)
Using cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)
Using cached attrs-23.2.0-py3-none-any.whl (60 kB)
Using cached frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl (53 kB)
Using cached multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl (30

In [19]:
import argparse
import requests
import time
from threading import Thread

class HTTPBenchmark:
    def __init__(self, target_url:str, qps: int=1) -> None:
        """
        Initialize the HTTPBenchmark object with the target URL and QPS
        Inputs:
        - target_url: The URL to test
        - qps: The number of requests per second to send
        """
        self.target_url = target_url
        self.qps = qps 
        self.results = {
            'total_requests': 0,
            'successful_requests': 0,
            'failed_requests': 0,
            'latencies': []
        }

    def __str__(self) -> str:
        """
        Return a string representation of the test
        """
        return f"HTTP Benchmark: {self.target_url} @ {self.qps} qps"
    
    def __repr__(self) -> str:
        """
        Return a string representation of the test
        """
        return self.__str__()

    def pretty_print_results(self) -> None:
        """
        Pretty print the results of the test
        Print in tabular format:
        -------
        | URL: {self.target_url} |
        -------
        | Num Requests: {self.results['total_requests']} |
        -------
        | Num Successful Requests: {self.results['successful_requests']} |
        -------
        | Num Failed Requests: {self.results['failed_requests']} |
        -------
        | Average Latency (ms): {self.results['latencies']} |
        -------
        """
        
        ret_parts = [
            f"URL: {self.target_url}",
            f"Num Requests: {self.results['total_requests']}",
            f"Num Successful Requests: {self.results['successful_requests']}",
            f"Num Failed Requests: {self.results['failed_requests']}",
            f"Average Latency (ms): {round(sum(self.results['latencies'])/len(self.results['latencies'])*1000,2)}"
        ]

        # Use max_len of parts to format the output table's length
        max_len = max(len(part) for part in ret_parts)

        # Create table
        line_sep = '\n' + '-' * (max_len+4) + '\n'
        table_lines = [ '| ' + part + ' '*(max_len-len(part)+1) + '|' for part in ret_parts]

        table = line_sep + line_sep.join(table_lines) + line_sep

        print(table)


    def send_request(self) -> None:
        """
        Helper function to send a request to the target URL
        Process the response and updates the results
        """
        start_time = time.time()
        try:
            response = requests.get(self.target_url)
            elapsed_time = time.time() - start_time
            self.results['latencies'].append(elapsed_time)
            self.results['total_requests'] += 1
            if response.status_code == 200:
                self.results['successful_requests'] += 1
            else:
                self.results['failed_requests'] += 1
        except requests.RequestException:
            self.results['failed_requests'] += 1

    def start_test(self, duration: int=10) -> None:
        """
        Start the test for the specified duration
        Inputs:
        - duration: The duration of the test in seconds
        """
        start_time = time.time()
        while time.time() - start_time <= duration:
            thread = Thread(target=self.send_request)
            thread.start()
            time.sleep(1 / self.qps)
        thread.join()



In [20]:
benchmark = HTTPBenchmark('https://fireworks.ai/', 10)
benchmark.start_test(duration=2)
benchmark.pretty_print_results()


---------------------------------
| URL: https://fireworks.ai/    |
---------------------------------
| Num Requests: 17              |
---------------------------------
| Num Successful Requests: 17   |
---------------------------------
| Num Failed Requests: 0        |
---------------------------------
| Average Latency (ms): 4767.64 |
---------------------------------



In [21]:
benchmark

HTTP Benchmark: https://fireworks.ai/ @ 10 qps

In [27]:
import argparse
import time
import asyncio
import aiohttp
from collections import Counter

class HTTPBenchmark2:
    def __init__(self, target_url:str, qps: int=1) -> None:
        """
        Initialize the HTTPBenchmark object with the target URL and QPS
        Inputs:
        - target_url: The URL to test
        - qps: The number of requests per second to send
        """
        self.target_url = target_url
        self.qps = qps 
        self.reset_results()

    def reset_results(self) -> None:
        """
        Reset the results of the test
        """
        self.results =  {
                'total_requests': 0,
                'successful_requests': 0,
                'failed_requests': 0,
                'latencies': [],
                'error_code_count': {}, # Error code and their counts, if any
            }

    def __str__(self) -> str:
        """
        Return a string representation of the test
        """
        return f"HTTP Benchmark: {self.target_url} @ {self.qps} qps"
    
    def __repr__(self) -> str:
        """
        Return a string representation of the test
        """
        return self.__str__()

    def pretty_print_results(self) -> None:
        """
        Pretty print the results of the test
        Print in tabular format:
        -------
        | URL: {self.target_url} |
        -------
        | Num Requests: {self.results['total_requests']} |
        -------
        | Num Successful Requests: {self.results['successful_requests']} |
        -------
        | Num Failed Requests: {self.results['failed_requests']} |
        -------
        | Average Latency (ms): {self.results['latencies']} |
        -------
        """
        
        ret_parts = [
            f"URL: {self.target_url}",
            f"Num Requests: {self.results['total_requests']}",
            f"Num Successful Requests: {self.results['successful_requests']}",
            f"Num Failed Requests: {self.results['failed_requests']}",
            f"Average Latency (ms): {round(sum(self.results['latencies'])/len(self.results['latencies'])*1000,2)}",
            f"Top {min(3, len(self.results['error_code_count']))} Error Codes: {{code: count for code, count in sorted(self.results['error_code_count'].items(), key=lambda x: x[1], reverse=True)[:3]}}"
        ]

        # Use max_len of parts to format the output table's length
        max_len = max(len(part) for part in ret_parts)

        # Create table
        line_sep = '\n' + '-' * (max_len+4) + '\n'
        table_lines = [ '| ' + part + ' '*(max_len-len(part)+1) + '|' for part in ret_parts]

        table = line_sep + line_sep.join(table_lines) + line_sep

        print(table)

    async def send_request(self, session, url):
        start_time = time.time()
        try:
            async with session.get(url, timeout=10) as response:
                await response.read()  # Ensure the whole response is fetched
                return time.time() - start_time, response.status
        except Exception as e:
            return time.time() - start_time, str(e)  # Capture and return exception message

    async def load_test(self, duration):
        tasks = []
        total_requests = self.qps * duration
        interval = 1 / self.qps  # Interval to maintain requests per second

        async with aiohttp.ClientSession() as session:
            start_time = time.time()
            for i in range(total_requests):
                if time.time() - start_time < duration:
                    task = asyncio.create_task(self.send_request(session, self.target_url))
                    tasks.append(task)
                    await asyncio.sleep(interval)  # Wait for the next request slot
                else:
                    break  # Stop if the duration is exceeded

            results = await asyncio.gather(*tasks)  # Wait for all tasks to complete

        self.results['total_requests'] = len(results)
        self.results['successful_requests'] = sum(1 for _, status in results if status == 200)
        self.results['failed_requests'] = len(results) - self.results['successful_requests']
        self.results['latencies'] = [latency for latency, status in results if status == 200]
        self.results['error_code_count'] = Counter([status for _, status in results if status != 200])

        success_count = sum(1 for _, status in results if isinstance(status, int) and status == 200)
        error_count = len(results) - success_count
        latencies = [latency for latency, status in results if status == 200]
        errors = [error for _, error in results if isinstance(error, str)]

        print(f"Total Requests: {len(results)}")
        print(f"Successful Requests: {success_count}")
        print(f"Failed Requests: {error_count}")
        print(f"Average Latency: {sum(latencies) / len(latencies) if latencies else None:.3f} seconds")
        print(f"Sample Errors: {errors[:10]}")  # Print first 10 error messages
    

    async def start_test(self, duration: int=10) -> None:
        """
        Start the test for the specified duration
        Inputs:
        - duration: The duration of the test in seconds
        """
        self.reset_results()
        asyncio.run(self.load_test(duration))



In [29]:
benchmark = HTTPBenchmark2('https://fireworks.ai/', 10)
benchmark.start_test(duration=2)
benchmark.pretty_print_results()

RuntimeError: asyncio.run() cannot be called from a running event loop

In [4]:
parser = argparse.ArgumentParser(description="HTTP Benchmark Tool")
parser.add_argument("--url", required=True, help="URL to benchmark")
parser.add_argument("--qps", type=int, required=True, help="Queries per second")
args = parser.parse_args()

benchmark = HTTPBenchmark(args.url, args.qps)
benchmark.start_test(duration=60)  # Run for 60 seconds

usage: ipykernel_launcher.py [-h] --url URL --qps QPS
ipykernel_launcher.py: error: the following arguments are required: --url, --qps


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
