# TF-IDF Predictor with Scikit-learn: Proof of Concept

Since we want to be able to make inferences from outside of the SageMaker interface, we want to train this model and deploy it to an endpoint. To do so, we use SageMaker's SKLearn Estimator to train a model on a SageMaker notebook instance

In [None]:
# Update and install packages
!pip install -Uqr requirements.txt
!/bin/bash ./local_mode_setup.sh

In [55]:
# Basic packages
import json
import pandas as pd
pd.set_option("display.max_colwidth", None)

# AWS packages
import sagemaker as sm
from sagemaker.sklearn import SKLearn
from sagemaker.predictor import Predictor
import boto3

In [78]:
# Train SM estimator
train_data = 's3://amplifyobserverinsights-aoinsightslandingbucket29-5vcr471d4nm5/data/issues/'
instance_type = 'ml.m5.4xlarge'

hyperparams = {
    'n_best': 10,
    'lemmatize': 'custom'
}

estimator_params = {
    'entry_point': 'train_tfidf.py',
    'role': sm.get_execution_role(),
    'instance_type': instance_type,
    'framework_version': '0.23-1',
    'hyperparameters': hyperparams,
    'dependencies': ['.']
}

sklearn_estimator = SKLearn(**estimator_params)
sklearn_estimator.fit({'train': train_data})

2021-08-03 23:30:35 Starting - Starting the training job...
2021-08-03 23:31:00 Starting - Launching requested ML instancesProfilerReport-1628033432: InProgress
...
2021-08-03 23:31:32 Starting - Preparing the instances for training.........
2021-08-03 23:33:00 Downloading - Downloading input data...
2021-08-03 23:33:28 Training - Training image download completed. Training in progress.[34m2021-08-03 23:33:28,864 sagemaker-containers INFO     Imported framework sagemaker_sklearn_container.training[0m
[34m2021-08-03 23:33:28,867 sagemaker-training-toolkit INFO     No GPUs detected (normal if no gpus installed)[0m
[34m2021-08-03 23:33:28,875 sagemaker_sklearn_container.training INFO     Invoking user training script.[0m
[34m2021-08-03 23:33:29,604 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:[0m
[34m/miniconda3/bin/python -m pip install -r requirements.txt[0m
[34mCollecting progress
  Downloading progress-1.6.tar.gz (7.8 kB)[0m
[34mCollec

In [None]:
def update_existing_endpoint(estimator, endpoint_name):
    predictor = Predictor('issue-similarity-endpoint')
    model = sklearn_estimator.create_model()
    session = model.sagemaker_session
    role = sagemaker.get_execution_role()
    model_name = model.name
    container_def = model.prepare_container_def(instance_type='ml.m5.4xlarge')
    session.create_model(model_name, role, container_def)


    endpoint_config_name = session.create_endpoint_config(name=model_name,
                                                          model_name=model_name,
                                                          initial_instance_count=1,
                                                          instance_type=instance_type)

    # Update desired endpoint with new Endpoint Config
    client = boto3.client('sagemaker')
    client.update_endpoint(EndpointName='issue-similarity-endpoint',
                           EndpointConfigName=endpoint_config_name)

In [90]:
# Deploy SM predictor
# predictor = sklearn_estimator.deploy(instance_type='ml.m5.4xlarge',
#                                      initial_instance_count=1,
# #                                      update_endpoint=True,
#                                     endpoint_name='issue-similarity-endpoint')
predictor = Predictor('issue-similarity-endpoint')
model = sklearn_estimator.create_model()
session = model.sagemaker_session
role = sagemaker.get_execution_role()
model_name = model.name
container_def = model.prepare_container_def(instance_type='ml.m5.4xlarge')
session.create_model(model_name, role, container_def)


# endpoint_config_name = session.create_endpoint_config(name=model_name,
#                                                       model_name=model_name,
#                                                       initial_instance_count=1,
#                                                       instance_type=instance_type)

# Update desired endpoint with new Endpoint Config
client = boto3.client('sagemaker')
client.update_endpoint(EndpointName='issue-similarity-endpoint',
                       EndpointConfigName=endpoint_config_name)
# TODO: can we do this?
# predictor.update_endpoint(
#     instance_type='ml.m5.4xlarge',
#     model_name = model_name,
#     initial_instance_count=1
# )

{'EndpointArn': 'arn:aws:sagemaker:us-west-2:092109498566:endpoint/issue-similarity-endpoint',
 'ResponseMetadata': {'RequestId': 'dddc5e36-6c18-46ad-886d-45ac2516e94c',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'dddc5e36-6c18-46ad-886d-45ac2516e94c',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '93',
   'date': 'Thu, 05 Aug 2021 19:23:12 GMT'},
  'RetryAttempts': 0}}

In [84]:
data = 'password manager autofill'
data = json.dumps({'data': data})
response = predictor.predict(data).decode('utf-8')
print(response)

{"Text": "password manager autofill", "Similar": [{"Url": "https://github.com/aws-amplify/amplify-js/issues/8472", "Title": "AmplifySignIn component does not work with password managers or native browser autofill", "Score": 0.681799102059258}, {"Url": "https://github.com/aws-amplify/amplify-js/issues/3799", "Title": "Password reset issue - chrome autofill", "Score": 0.3848452953980297}, {"Url": "https://github.com/aws-amplify/amplify-adminui/issues/233", "Title": "Password managers, remember password, and suggest password not working in login form", "Score": 0.3810235829475551}, {"Url": "https://github.com/aws-amplify/aws-sdk-ios/issues/3076", "Title": "Add Support to Swift Package Manager", "Score": 0.37277832850287257}, {"Url": "https://github.com/aws-amplify/aws-sdk-ios/issues/313", "Title": "Support Swift Package Manager", "Score": 0.3507859987807144}, {"Url": "https://github.com/aws-amplify/amplify-js/issues/4748", "Title": "[VueJS] Firefox autofill don't work", "Score": 0.3402311