In [1]:
import sagemaker, time
from sagemaker import get_execution_role
import boto3, json

In [2]:
sess = sagemaker.Session()
role = get_execution_role()
print('Role: '+role)
bucket = sess.default_bucket()
print(bucket)
region_name = boto3.Session().region_name
prefix = 'fasttext/pretrained'

Role: arn:aws:iam::417193079866:role/service-role/AmazonSageMaker-ExecutionRole-20200917T123532
sagemaker-us-east-2-417193079866


In [3]:
container = sagemaker.amazon.amazon_estimator.get_image_uri(region_name, 'blazingtext','latest')
print('testing blazingtext')

'get_image_uri' method will be deprecated in favor of 'ImageURIProvider' class in SageMaker Python SDK v2.


testing blazingtext


In [4]:
## download the data
!wget -O model.bin https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin

--2020-11-10 05:26:21--  https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 172.67.9.4, 104.22.74.142, 104.22.75.142, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|172.67.9.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 131266198 (125M) [application/octet-stream]
Saving to: ‘model.bin’


2020-11-10 05:26:29 (15.7 MB/s) - ‘model.bin’ saved [131266198/131266198]



In [5]:
!tar -czvf langid.tar.gz model.bin
model_location = sess.upload_data('langid.tar.gz', bucket, key_prefix = prefix)
!rm langid.tar.gz model.bin

model.bin


In [6]:
lang_id = sagemaker.Model(model_data= model_location, image= container, role=role, sagemaker_session=sess)
lang_id.deploy(initial_instance_count = 1, instance_type = 'ml.m4.xlarge')
predictor = sagemaker.RealTimePredictor(endpoint=lang_id.endpoint_name,
                                       sagemaker_session=sess,
                                       serializer=json.dumps,
                                       deserializer=sagemaker.predictor.json_deserializer)

Parameter image will be renamed to image_uri in SageMaker Python SDK v2.


---------------!

In [7]:
sentences = ['Wherefore art thou Romeo?',
            "Ceci n'est pas une pipe.",
            'Ich bin ein Berliner.']

payload = {'instances': sentences}

In [8]:
predictions = predictor.predict(payload)
print(predictions)

[{'label': ['__label__en'], 'prob': [0.6306219100952148]}, {'label': ['__label__fr'], 'prob': [0.9999626874923706]}, {'label': ['__label__de'], 'prob': [0.9984368681907654]}]


In [9]:
import copy 
predictions_copy = copy.deepcopy(predictions)

#changing the labels in-place
for output in predictions_copy:
    output['label'] = output['label'][0][9:].upper()
print(predictions_copy)

[{'label': 'EN', 'prob': [0.6306219100952148]}, {'label': 'FR', 'prob': [0.9999626874923706]}, {'label': 'DE', 'prob': [0.9984368681907654]}]


In [10]:
sess.delete_endpoint(predictor.endpoint)