In [3]:
# SPDX-License-Identifier: Apache-2.0
# The OpenSearch Contributors require contributions made to
# this file be licensed under the Apache-2.0 license or a
# compatible open source license.
# Any modifications Copyright OpenSearch Contributors. See
# GitHub history for details.

import argparse
import os
import sys
import warnings

import opensearchpy
from opensearchpy import OpenSearch

sys.path.append(os.path.abspath(os.path.join("../..")))

import opensearch_py_ml
from opensearch_py_ml.ml_commons import MLCommonClient
from opensearch_py_ml.ml_commons.model_uploader import ModelUploader
from opensearch_py_ml.ml_models.sentencetransformermodel import SentenceTransformerModel
from tests import OPENSEARCH_TEST_CLIENT

TORCH_SCRIPT_FORMAT = "TORCH_SCRIPT"
ONNX_FORMAT = "ONNX"
BOTH_FORMAT = "BOTH"
ORIGINAL_FOLDER_PATH = "sentence-transformers-original/"
TORCHSCRIPT_FOLDER_PATH = "sentence-transformers-torchscript/"
ONXX_FOLDER_PATH = "sentence-transformers-onxx/"
MODEL_CONFIG_FILE_NAME = "ml-commons_model_config.json"
TEST_SENTENCES = ["First test sentence", "Second test sentence"]
RTOL_TEST = 1e-03
ATOL_TEST = 1e-05
ML_BASE_URI = "/_plugins/_ml"

  from .autonotebook import tqdm as notebook_tqdm
  OS_VERSION = os_version(OPENSEARCH_TEST_CLIENT)


In [4]:
print(sys.path)

['/local/home/latchari/opensearch-pr/opensearch-py-ml/utils/model_uploader', '/home/linuxbrew/.linuxbrew/opt/python@3.8/lib/python38.zip', '/home/linuxbrew/.linuxbrew/opt/python@3.8/lib/python3.8', '/home/linuxbrew/.linuxbrew/opt/python@3.8/lib/python3.8/lib-dynload', '', '/home/linuxbrew/.linuxbrew/opt/python@3.8/lib/python3.8/site-packages', '/local/home/latchari/opensearch-pr/opensearch-py-ml/utils/opensearch-py-ml', '/local/home/latchari/opensearch-pr/opensearch-py-ml']


In [5]:
def trace_sentence_transformer_model(model_id, model_version, embedding_dimension, pooling_mode, model_format):
    folder_path = TORCHSCRIPT_FOLDER_PATH if model_format == TORCH_SCRIPT_FORMAT else ONXX_FOLDER_PATH
    
    pre_trained_model = None
    try:
        pre_trained_model =  SentenceTransformerModel(model_id=model_id, folder_path=folder_path, overwrite=True)
    except:
        raise AssertionError(f"Raised Exception in tracing {model_format} model\
                             during initiating a sentence transformer model class object")
    
    # TODO: Check if model exists in database
    
    model_path = None
    raised = False
    try:
        if model_format == TORCH_SCRIPT_FORMAT:
            model_path = pre_trained_model.save_as_pt(model_id=model_id, sentences=TEST_SENTENCES)
        else:
             model_path = pre_trained_model.save_as_onnx(model_id=model_id)
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception during saving model as {model_format}"
        
    raised = False
    try:
        pre_trained_model.make_model_config_json(
            version_number=model_version,
            model_format=model_format,
            embedding_dimension=embedding_dimension,
            pooling_mode=pooling_mode
        )
    except:
        raised = True
    assert raised == False, f"Raised Exception during making model config file for {model_format} model"
    model_config_path = folder_path + MODEL_CONFIG_FILE_NAME
    
    return model_path, model_config_path


def upload_sentence_transformer_model(ml_client, model_path, model_config_path, model_format):
    embedding_data = None
    
    model_id = ""
    task_id = ""
    raised = False
    try:
        model_id = ml_client.register_model(
            model_path=model_path,
            model_config_path=model_config_path,
            deploy_model=False,
            isVerbose=True,
        )
        print()
        print(f"{model_format}_model_id:", model_id)
        assert model_id != "" or model_id is not None
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception in {model_format} model registration"
    
    raised = False
    try:
        ml_load_status = ml_client.deploy_model(model_id)
        api_url = f"{ML_BASE_URI}/models/{model_id}/_deploy"
        task_id = ml_client._client.transport.perform_request(method="POST", url=api_url)["task_id"]
        assert task_id != "" or task_id is not None
        ml_model_status = ml_client.get_model_info(model_id)
        assert ml_model_status.get("model_state") != "DEPLOY_FAILED"
        print(f"{model_format}_task_id:", task_id)
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception in {model_format} model deployment"

    raised = False
    try:
        ml_model_status = ml_client.get_model_info(model_id)
        print()
        print("Model Status:")
        print(ml_model_status)
        assert ml_model_status.get("model_format") == model_format
        assert ml_model_status.get("algorithm") == "TEXT_EMBEDDING"
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception in getting {model_format} model info"
    
    raised = False
    ml_task_status = None
    try:
        ml_task_status = ml_client.get_task_info(task_id, wait_until_task_done=True)
        print()
        print("Task Status:")
        print(ml_task_status)
        assert ml_task_status.get("task_type") == "DEPLOY_MODEL"
        assert ml_task_status.get("state") != "FAILED"
    except:  # noqa: E722
        print("Model Task Status:", ml_task_status)
        raised = True
    assert raised == False, f"Raised Exception in pulling task info for {model_format} model"
            
    # This is test is being flaky. Sometimes the test is passing and sometimes showing 500 error
    # due to memory circuit breaker.
    # Todo: We need to revisit this test.
    try:
        embedding_output = ml_client.generate_embedding(model_id, TEST_SENTENCES)
        assert len(embedding_output.get("inference_results")) == 2
        embedding_data = embedding_output["inference_results"][0]["output"][0]["data"]
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception in generating sentence embedding with {model_format} model"
    
    try:
        delete_task_obj = ml_client.delete_task(task_id)
        assert delete_task_obj.get("result") == "deleted"
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception in deleting task for {model_format} model"

    try:
        ml_client.undeploy_model(model_id)
        ml_model_status = ml_client.get_model_info(model_id)
        assert ml_model_status.get("model_state") != "UNDEPLOY_FAILED"
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception in {model_format} model undeployment"

    try:
        delete_model_obj = ml_client.delete_model(model_id)
        assert delete_model_obj.get("result") == "deleted"
    except:  # noqa: E722
        raised = True
    assert raised == False, f"Raised Exception in deleting {model_format} model"
            
    return embedding_data


def verify_embedding_data(original_embedding_data, tracing_embedding_data, tracing_format):
    raised = False
    try:
        np.testing.assert_allclose(
            original_embedding_data, 
            tracing_embedding_data, 
            rtol=RTOL_TEST, 
            atol=ATOL_TEST
        )
    except:
        raised = True
    assert raised == False, "Raised Exception in embedding verification"
    print(f"Original embeddings matches {tracing_format} embeddings")

In [6]:
ml_client = MLCommonClient(OPENSEARCH_TEST_CLIENT)

In [7]:
model_id = "sentence-transformers/msmarco-distilbert-base-tas-b"
model_version = "1.0.1"
embedding_dimension = None
pooling_mode = None
tracing_format = 'BOTH'

In [33]:
from sentence_transformers import SentenceTransformer # ***
pre_trained_model = SentenceTransformer(model_id)
original_embedding_data = list(pre_trained_model.encode(TEST_SENTENCES, convert_to_numpy=True)[0])

In [35]:
len(original_embedding_data)

768

In [15]:
torchscript_model_path, torchscript_model_config_path = trace_sentence_transformer_model(
            model_id, model_version, embedding_dimension, pooling_mode, TORCH_SCRIPT_FORMAT
)

  mask, torch.tensor(torch.finfo(scores.dtype).min)


model file is saved to  sentence-transformers-torchscript/msmarco-distilbert-base-tas-b.pt
zip file is saved to  sentence-transformers-torchscript/msmarco-distilbert-base-tas-b.zip 

ml-commons_model_config.json file is saved at :  sentence-transformers-torchscript/ml-commons_model_config.json


In [16]:
print(torchscript_model_path)
print(torchscript_model_config_path)

sentence-transformers-torchscript/msmarco-distilbert-base-tas-b.zip
sentence-transformers-torchscript/ml-commons_model_config.json


In [17]:
torch_embedding_data = upload_sentence_transformer_model(
            ml_client, torchscript_model_path, torchscript_model_config_path, TORCH_SCRIPT_FORMAT
)

Total number of chunks 27
Sha1 value of the model file:  4bad8ee3fa8eb81a665396ebe922d90be0abd47e32eec299d85963e0e08a0442
Model meta data was created successfully. Model Id:  rnt1OIkB022__5wNmq4p
uploading chunk 1 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 2 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 3 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 4 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 5 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 6 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 7 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 8 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 9 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 10 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 11 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 12 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 13 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 14 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 15 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 16 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 17 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 18 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 19 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 20 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 21 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 22 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 23 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 24 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 25 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 26 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 27 of 27




Model id: {'status': 'Uploaded'}
Model registered successfully

TORCH_SCRIPT_model_id: rnt1OIkB022__5wNmq4p




Model deployed successfully
TORCH_SCRIPT_task_id: sHt1OIkB022__5wN3K4G

Model Status:
{'name': 'sentence-transformers/msmarco-distilbert-base-tas-b', 'algorithm': 'TEXT_EMBEDDING', 'model_version': '1.0.1', 'model_format': 'TORCH_SCRIPT', 'model_state': 'DEPLOYING', 'model_content_size_in_bytes': 266384067, 'model_content_hash_value': '4bad8ee3fa8eb81a665396ebe922d90be0abd47e32eec299d85963e0e08a0442', 'model_config': {'model_type': 'distilbert', 'embedding_dimension': 768, 'framework_type': 'SENTENCE_TRANSFORMERS', 'all_config': '{"_name_or_path": "/home/latchari/.cache/torch/sentence_transformers/sentence-transformers_msmarco-distilbert-base-tas-b/", "activation": "gelu", "architectures": ["DistilBertModel"], "attention_dropout": 0.1, "dim": 768, "dropout": 0.1, "hidden_dim": 3072, "initializer_range": 0.02, "max_position_embeddings": 512, "model_type": "distilbert", "n_heads": 12, "n_layers": 6, "pad_token_id": 0, "qa_dropout": 0.1, "seq_classif_dropout": 0.2, "sinusoidal_pos_embds":












Task Status:
{'model_id': 'rnt1OIkB022__5wNmq4p', 'task_type': 'DEPLOY_MODEL', 'function_name': 'TEXT_EMBEDDING', 'state': 'COMPLETED', 'worker_node': ['3w2aOHUBRmmwmVfoGgW8Jw'], 'create_time': 1688869395462, 'last_update_time': 1688869395497, 'is_async': True}


In [21]:
len(torch_embedding_data)

768

In [38]:
import numpy as np
np.testing.assert_allclose(
            original_embedding_data, 
            torch_embedding_data, 
            rtol=RTOL_TEST, 
            atol=ATOL_TEST
        )

In [39]:
verify_embedding_data(original_embedding_data, torch_embedding_data, TORCH_SCRIPT_FORMAT)

Original embeddings matches TORCH_SCRIPT embeddings


In [40]:
onnx_model_path, onnx_model_config_path = trace_sentence_transformer_model(
            model_id, model_version, embedding_dimension, pooling_mode, ONNX_FORMAT
        )

print(onnx_model_path, onnx_model_config_path)



ONNX opset version set to: 15
Loading pipeline (model: sentence-transformers/msmarco-distilbert-base-tas-b, tokenizer: sentence-transformers/msmarco-distilbert-base-tas-b)
Creating folder sentence-transformers-onxx/onnx
Using framework PyTorch: 1.13.1+cu117
Found input input_ids with shape: {0: 'batch', 1: 'sequence'}
Found input attention_mask with shape: {0: 'batch', 1: 'sequence'}
Found output output_0 with shape: {0: 'batch', 1: 'sequence'}
Ensuring inputs are in correct order
head_mask is not present in the generated input list.
Generated inputs order: ['input_ids', 'attention_mask']
zip file is saved to  sentence-transformers-onxx/msmarco-distilbert-base-tas-b.zip 

ml-commons_model_config.json file is saved at :  sentence-transformers-onxx/ml-commons_model_config.json
sentence-transformers-onxx/msmarco-distilbert-base-tas-b.zip sentence-transformers-onxx/ml-commons_model_config.json


In [41]:
onnx_embedding_data = upload_sentence_transformer_model(
            ml_client, onnx_model_path, onnx_model_config_path, ONNX_FORMAT
        )

Total number of chunks 27
Sha1 value of the model file:  2f74213940bed6b7eefca0bc0577b711d4145bbd5bf3a633c9640497df0b8dbe
Model meta data was created successfully. Model Id:  sXt-OIkB022__5wN2657
uploading chunk 1 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 2 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 3 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 4 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 5 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 6 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 7 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 8 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 9 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 10 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 11 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 12 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 13 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 14 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 15 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 16 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 17 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 18 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 19 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 20 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 21 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 22 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 23 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 24 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 25 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 26 of 27




Model id: {'status': 'Uploaded'}
uploading chunk 27 of 27




Model id: {'status': 'Uploaded'}
Model registered successfully

ONNX_model_id: sXt-OIkB022__5wN2657




Model deployed successfully
ONNX_task_id: s3t_OIkB022__5wNGq72

Model Status:
{'name': 'sentence-transformers/msmarco-distilbert-base-tas-b', 'algorithm': 'TEXT_EMBEDDING', 'model_version': '1.0.1', 'model_format': 'ONNX', 'model_state': 'DEPLOYING', 'model_content_size_in_bytes': 266279916, 'model_content_hash_value': '2f74213940bed6b7eefca0bc0577b711d4145bbd5bf3a633c9640497df0b8dbe', 'model_config': {'model_type': 'distilbert', 'embedding_dimension': 768, 'framework_type': 'SENTENCE_TRANSFORMERS', 'all_config': '{"_name_or_path": "/home/latchari/.cache/torch/sentence_transformers/sentence-transformers_msmarco-distilbert-base-tas-b/", "activation": "gelu", "architectures": ["DistilBertModel"], "attention_dropout": 0.1, "dim": 768, "dropout": 0.1, "hidden_dim": 3072, "initializer_range": 0.02, "max_position_embeddings": 512, "model_type": "distilbert", "n_heads": 12, "n_layers": 6, "pad_token_id": 0, "qa_dropout": 0.1, "seq_classif_dropout": 0.2, "sinusoidal_pos_embds": false, "tie_wei












Task Status:
{'model_id': 'sXt-OIkB022__5wN2657', 'task_type': 'DEPLOY_MODEL', 'function_name': 'TEXT_EMBEDDING', 'state': 'COMPLETED', 'worker_node': ['3w2aOHUBRmmwmVfoGgW8Jw'], 'create_time': 1688870001398, 'last_update_time': 1688870001424, 'is_async': True}


In [42]:
verify_embedding_data(original_embedding_data, onnx_embedding_data, ONNX_FORMAT)

Original embeddings matches ONNX embeddings
