# AWS SageMaker and nomic-embed-text-v1.5
This notebook demos how to embed your documents using nomic-embed-text-v1.5 running on AWS SageMaker.

In [1]:
import boto3
import sagemaker

from nomic.aws.sagemaker import batch_sagemaker_requests, \
                                create_sagemaker_request_for_batch, \
                                parse_sagemaker_response

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


## SageMaker configuration

In [2]:
# Must specify region
sess = boto3.Session(region_name="us-east-2")
sm = sess.client("sagemaker")
sagemaker_session = sagemaker.Session(boto_session=sess)
client = boto3.client("sagemaker-runtime", region_name="us-east-2")

In [3]:
endpoint_name = 'triton-nomic-embed-text-v1-5-test-2024-03-18-18-50-54'

## Embed documents

In [4]:
texts = [
    "This is a test", 
    "This is another test",
    "This is a third test",
    "The quick brown fox jumps over the lazy dog",
    "The quick brown fox jumps over the lazy dog",
    "The quick brown fox jumps over the lazy dog",
]

In [5]:
embeddings = []
for embed_request, header_length in batch_sagemaker_requests(texts, batch_size=2):
    response = client.invoke_endpoint(
        EndpointName=endpoint_name,
        Body=embed_request,
        ContentType="application/vnd.sagemaker-triton.binary+json;json-header-size={}".format(header_length),
    )
    embeddings.append(parse_sagemaker_response(response))

In [6]:
embeddings

[array([[ 0.0335  ,  0.015335, -0.01962 , ...,  0.003212, -0.000763,
         -0.01043 ],
        [ 0.03824 ,  0.02356 , -0.013275, ..., -0.00659 , -0.03375 ,
         -0.01296 ]], dtype=float16),
 array([[ 0.0382   ,  0.0273   , -0.00674  , ..., -0.03873  ,  0.0001578,
         -0.006977 ],
        [-0.02098  ,  0.00974  ,  0.00788  , ...,  0.002537 , -0.05994  ,
          0.01656  ]], dtype=float16),
 array([[-0.02098 ,  0.00974 ,  0.00788 , ...,  0.002537, -0.05994 ,
          0.01656 ],
        [-0.02098 ,  0.00974 ,  0.00788 , ...,  0.002537, -0.05994 ,
          0.01656 ]], dtype=float16)]