# MLOps - NLP Lab with Amazon SageMaker

**Step 3** - *Deploy your model on a SageMaker Endpoint and query it for prediction*

## Initialization
---
### Setup environment

In [None]:
import os
import sagemaker
from sagemaker import get_execution_role
from sagemaker.utils import name_from_base
from sagemaker.pytorch import PyTorchModel
from sagemaker.predictor import RealTimePredictor, json_serializer, json_deserializer

role = sagemaker.get_execution_role()

### Extract location of the model artifact

In [None]:
model_artifact_fname = '../2_train_model/model_artifact_location.txt'
if os.path.exists(model_artifact_fname):
    with open(model_artifact_fname, 'r') as f:
        model_artefact = f.readline()

    print(model_artefact)
    
else:
    print(f'Model artifact location file not found ({model_artifact_fname}): check that the previous notebook was fully executed.')

## Create Amazon SageMaker endpoint
---

In [None]:
# Defines a real time predictor object to serve our model:
class SentimentAnalysis(RealTimePredictor):
    def __init__(self, endpoint_name, sagemaker_session):
        super().__init__(endpoint_name, 
                         sagemaker_session=sagemaker_session, 
                         serializer=json_serializer, 
                         deserializer=json_deserializer, 
                         content_type='application/json')

# Use the previously defined Predictor to build a Model:
model = PyTorchModel(model_data=model_artefact,
                     name=name_from_base('bert-model'),
                     role=role,
                     entry_point='predict_endpoint.py',
                     source_dir='source_dir',
                     framework_version='1.5.0',
                     predictor_cls=SentimentAnalysis)

In [None]:
# Deploy the model:
predictor = model.deploy(initial_instance_count=1, instance_type='ml.m5.xlarge')

## Predict
---

Now we can request prediction from our model, deployed behind the previous endpoint:

In [None]:
# First we define the payload:
test_data = {"text": "I love completing my todos! Best app ever!!!"}
print(test_data)

In [None]:
# We send this payload to the endpoint:
prediction = predictor.predict(test_data)

In [None]:
# And print the request and associated payload
print(f'Review text: {test_data}')
print(f'Sentiment  : {prediction}')

In [None]:
# Don't forget to delete this endpoint or you will continue to incur cost while it's live:
predictor.delete_endpoint()