# Stress Test

The idea of this code is to see how the production Endpoint will behave when a **bunch** of requests arrive it.
Let's simulate several users doing predictions at the same time

In [1]:
import threading
import boto3
import json
import numpy as np
import time
import math

from multiprocessing.pool import ThreadPool
from sklearn import datasets

In [2]:
sm = boto3.client("sagemaker-runtime")
codepipeline = boto3.client('codepipeline')

pipeline_name = 'iris-train-pipeline'
endpoint_name_mask='mlops-iris-model-%s-%s'

iris = datasets.load_iris()
dataset = np.insert(iris.data, 0, iris.target,axis=1)

In [3]:
def get_env_jobid(env):
    response = codepipeline.get_pipeline_state( name=pipeline_name )
    for stage in response['stageStates']:
        if stage['stageName'] == 'Deploy%s' % env.capitalize():
            for action in stage['actionStates']:
                if action['actionName'] == 'DeployModel%s' % env.capitalize():
                    return stage['latestExecution']['pipelineExecutionId']

In [4]:
def predict(payload):
    payload = payload
    X = [ payload[1:] ]
    y = payload[0]
    response = []
    elapsed_time = time.time()
    resp = sm.invoke_endpoint(
        EndpointName=endpoint_name_mask % (env, job_id),
        CustomAttributes='logistic',
        Body=json.dumps(X)
    )
    elapsed_time = time.time() - elapsed_time
    resp = json.loads(resp['Body'].read())
    response.append((resp['iris_id'][0] == y, elapsed_time))
    
    elapsed_time = time.time()
    resp = sm.invoke_endpoint(
        EndpointName=endpoint_name_mask % (env, job_id),
        CustomAttributes='random_forest',
        Body=json.dumps(X)
    )
    elapsed_time = time.time() - elapsed_time
    resp = json.loads(resp['Body'].read())
    response.append((resp['iris_id'][0] == y, elapsed_time))
    
    return response

In [5]:
def run_test(max_threads, max_requests):
    num_batches = math.ceil(max_requests / len(dataset))
    requests = []
    for i in range(num_batches):
        batch = dataset.copy()
        np.random.shuffle(batch)
        requests += batch.tolist()
    len(requests)

    pool = ThreadPool(max_threads)
    result = pool.map(predict, requests)
    pool.close()
    pool.join()
    
    correct_logistic=0
    correct_random_forest=0
    elapsedtime_logistic=0
    elapsedtime_random_forest=0
    for i in result:
        correct_logistic += i[0][0]
        correct_random_forest += i[1][0]

        elapsedtime_logistic += i[0][1]
        elapsedtime_random_forest += i[1][1]
    print("Score logistic: {}".format(correct_logistic/len(result)))
    print("Score random forest: {}".format(correct_random_forest/len(result)))

    print("Elapsed time logistic: {}s".format(elapsedtime_logistic))
    print("Elapsed time random forest: {}s".format(elapsedtime_random_forest))

In [6]:
env='prd'
job_id=get_env_jobid(env)

In [7]:
%%time
print("Starting test 1")
run_test(10, 1000)

Starting test 1
Score logistic: 0.9266666666666666
Score random forest: 0.9733333333333334
Elapsed time logistic: 208.70884108543396s
Elapsed time random forest: 422.71905875205994s
CPU times: user 5.18 s, sys: 408 ms, total: 5.59 s
Wall time: 1min 6s


In [None]:
%%time
print("Starting test 2")
run_test(100, 10000)

In [None]:
%%time
print("Starting test 3")
run_test(150, 100000)

> While this test is running, go to the **AWS Console** -> **Sagemaker**, then click on the **Endpoint** and then click on the **CloudWatch** monitoring logs to see the Endpoint Behavior

## In CloudWatch, mark the following three checkboxes
![CloudWatchA](../../imgs/CloudWatchA.png)

## Then, change the following config, marked in RED

![CloudWatchB](../../imgs/CloudWatchB.png)

## Now, while your stress test is still running, you will see the Auto Scaling Alarm like this, after 3 datapoints above 750 Invocations Per Instance

![CloudWatchC](../../imgs/CloudWatchC.png)

When this happens, the Endpoint Autoscaling will start adding more instances to your cluster. You can observe in the Graph from the previous image that, after new instances are added to the cluster, the **Invocations** metrics grows.

## Well done!