Copyright (c) Microsoft Corporation. All rights reserved.

Licensed under the MIT License.

# Load Test deployed web application

In this notebook, we test the latency of the deployed web application by sending a number of duplicate questions as asychronous requests.

In [None]:
import asyncio
import json
import urllib.request
from timeit import default_timer

import aiohttp
import nest_asyncio
import pandas as pd
from azureml.core.webservice import AksWebservice
from azureml.core.workspace import Workspace
from dotenv import get_key, find_dotenv
from tqdm import tqdm
from utilities import text_to_json, get_auth


In [None]:
print(aiohttp.__version__) 

In [None]:
nest_asyncio.apply()

In [None]:
env_path = find_dotenv(raise_error_if_not_found=True)

In [None]:
ws = Workspace.from_config(auth=get_auth(env_path))
print(ws.name, ws.resource_group, ws.location, sep="\n")

Let's retrive the web service.

In [None]:
aks_service_name = get_key(env_path, 'aks_service_name')
aks_service = AksWebservice(ws, name=aks_service_name)
aks_service.name

We will test our deployed service with 100 calls. We will only have 4 requests concurrently at any time. Feel free to try different values and see how the service responds.

In [None]:
NUMBER_OF_REQUESTS = 100  # Total number of requests
CONCURRENT_REQUESTS = 4   # Number of requests at a time

Get the scoring URL and API key of the service.

In [None]:
scoring_url = aks_service.scoring_uri
api_key = aks_service.get_keys()[0]

In [None]:
dupes_test_path = './data_folder/dupes_test.tsv'
dupes_test = pd.read_csv(dupes_test_path, sep='\t', encoding='latin1')
dupes_to_score = dupes_test.iloc[:NUMBER_OF_REQUESTS,4]

In [None]:
url_list = [[scoring_url, jsontext] for jsontext in dupes_to_score.apply(text_to_json)]

In [None]:
def decode(result):
    return json.loads(result.decode("utf-8"))

In [None]:
async def fetch(url, session, data, headers):
    start_time = default_timer()
    async with session.request("post", url, data=data, headers=headers) as response:
        resp = await response.read()
        elapsed = default_timer() - start_time
        return resp, elapsed

In [None]:
async def bound_fetch(sem, url, session, data, headers):
    # Getter function with semaphore.
    async with sem:
        return await fetch(url, session, data, headers)


In [None]:
async def await_with_progress(coros):
    results = []
    for f in tqdm(asyncio.as_completed(coros), total=len(coros)):
        result = await f
        results.append((decode(result[0]), result[1]))
    return results


In [None]:
async def run(url_list, num_concurrent=CONCURRENT_REQUESTS):
    headers = {
        "content-type": "application/json",
        "Authorization": ("Bearer " + api_key),
    }
    tasks = []
    # create instance of Semaphore
    sem = asyncio.Semaphore(num_concurrent)

    # Create client session that will ensure we dont open new connection
    # per each request.
    async with aiohttp.ClientSession() as session:
        for url, data in url_list:
            # pass Semaphore and session to every POST request
            task = asyncio.ensure_future(bound_fetch(sem, url, session, data, headers))
            tasks.append(task)
        return await await_with_progress(tasks)


Below we run the 100 requests against our deployed service.

In [None]:
loop = asyncio.get_event_loop()
start_time = default_timer()
complete_responses = loop.run_until_complete(
    asyncio.ensure_future(run(url_list, num_concurrent=CONCURRENT_REQUESTS))
)
elapsed = default_timer() - start_time
print("Total Elapsed {}".format(elapsed))
print("Avg time taken {0:4.2f} ms".format(1000 * elapsed / len(url_list)))

In [None]:
# Example response
complete_responses[0]

Let's use the number of original questions to count the succesful responses.

In [None]:
no_questions = len(eval(complete_responses[0][0]))

In [None]:
num_succesful = [len(eval(i[0])) for i in complete_responses].count(no_questions)
print("Succesful {} out of {}".format(num_succesful, len(url_list)))

Next, we will explore the real-time scoring in an [iPyWidget app](07_RealTimeScoring.ipynb).