In [None]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/ping', methods=['GET'])
def ping():
    # Check if the classifier was loaded correctly
    health = True
    status = 200 if health else 404
    return flask.Response(response= '\n', status=status, mimetype='application/json')


@app.route('/invocations', methods=['POST'])
def transformation():
    # Check if the request contains JSON data
    if not request.is_json:
        return jsonify({'error': 'Request must contain JSON data'}), 400
    
    # Parse JSON data from the request
    try:
        input_json = request.get_json()
    except Exception as e:
        return jsonify({'error': 'Failed to parse JSON data'}), 400
    
    # Check if 'input' key is present in the JSON data
    if 'input' not in input_json:
        return jsonify({'error': 'Input data is missing'}), 400
    
    input_data = input_json['input']
    
    # Perform transformation (in this case, just echoing back the input)
    output_data = input_data
    
    # Prepare response
    response = {
        'output': output_data
    }
    
    return jsonify(response)


if __name__ == '__main__':
    app.run(debug=True, host='0.0.0.0', port="8080")


In [19]:
import boto3
from sagemaker import get_execution_role

sm_client = boto3.client(service_name='sagemaker')
runtime_sm_client = boto3.client(service_name='sagemaker-runtime')

account_id = boto3.client('sts').get_caller_identity()['Account']
region = boto3.Session().region_name

role = get_execution_role()

In [20]:
from time import gmtime, strftime

model_name = 'vllm'

container = '101840641443.dkr.ecr.us-east-1.amazonaws.com/vllm:latest'
instance_type = 'ml.t2.xlarge'

print('Model name: ' + model_name)
#print('Model data Url: ' + model_url)
print('Container image: ' + container)

container = {
'Image': container
}

create_model_response = sm_client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = role,
    Containers = [container])

print("Model Arn: " + create_model_response['ModelArn'])

Model name: vllm
Container image: 101840641443.dkr.ecr.us-east-1.amazonaws.com/vllm:latest
Model Arn: arn:aws:sagemaker:us-east-1:101840641443:model/vllm


In [21]:
endpoint_config_name = 'vllm-config' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print('Endpoint config name: ' + endpoint_config_name)

create_endpoint_config_response = sm_client.create_endpoint_config(
    EndpointConfigName = endpoint_config_name,
    ProductionVariants=[{
        'InstanceType': instance_type,
        'InitialInstanceCount': 1,
        'InitialVariantWeight': 1,
        'ModelName': model_name,
        'VariantName': 'AllTraffic'}])
        
print("Endpoint config Arn: " + create_endpoint_config_response['EndpointConfigArn'])

Endpoint config name: vllm-config2024-04-02-17-05-38
Endpoint config Arn: arn:aws:sagemaker:us-east-1:101840641443:endpoint-config/vllm-config2024-04-02-17-05-38


In [None]:
%%time

import time

endpoint_name = 'vllm-endpoint-org' + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print('Endpoint name: ' + endpoint_name)

create_endpoint_response = sm_client.create_endpoint(
    EndpointName=endpoint_name,
    EndpointConfigName=endpoint_config_name)
print('Endpoint Arn: ' + create_endpoint_response['EndpointArn'])

resp = sm_client.describe_endpoint(EndpointName=endpoint_name)
status = resp['EndpointStatus']
print("Endpoint Status: " + status)

print('Waiting for {} endpoint to be in service...'.format(endpoint_name))
waiter = sm_client.get_waiter('endpoint_in_service')
waiter.wait(EndpointName=endpoint_name)


Endpoint name: vllm-endpoint-org2024-04-02-17-05-56
Endpoint Arn: arn:aws:sagemaker:us-east-1:101840641443:endpoint/vllm-endpoint-org2024-04-02-17-05-56
Endpoint Status: Creating
Waiting for vllm-endpoint-org2024-04-02-17-05-56 endpoint to be in service...


In [12]:
import json
content_type = "application/json"
request_body = {"input": "This is a test with NER in America with \
    Amazon and Microsoft in Seattle, writing random stuff."}

#Serialize data for endpoint
#data = json.loads(json.dumps(request_body))
payload = json.dumps(request_body)

#Endpoint invocation
response = runtime_sm_client.invoke_endpoint(
EndpointName=endpoint_name,
ContentType=content_type,
Body=payload)

#Parse results
result = json.loads(response['Body'].read().decode())['output']
result

ValidationError: An error occurred (ValidationError) when calling the InvokeEndpoint operation: Endpoint vllm-endpoint-org2024-04-02-16-56-37 of account 101840641443 not found.

In [None]:
import requests

# Define the base URL of your Flask API
base_url = 'http://localhost:5000/'

def test_get_request():
    # Make a GET request to the base URL
    response = requests.get(base_url)
    
    # Print the response content
    print("GET Request Response:")
    print(response.text)

if __name__ == "__main__":
    # Test GET request
    test_get_request()
    



In [None]:
import json
content_type = "application/json"
request_body = {"input": "This is a test with NER in America with \
    Amazon and Microsoft in Seattle, writing random stuff."}

#Serialize data for endpoint
#data = json.loads(json.dumps(request_body))
payload = json.dumps(request_body)

#Endpoint invocation
response = runtime_sm_client.invoke_endpoint(
EndpointName=endpoint_name,
ContentType=content_type)

#Parse results
result = json.loads(response['Body'].read().decode())['output']
result