# Experiment Notebook
Load .onnx and Verify Embedding without ML-Commons API to see if the problem is with ML-Commons API or the .onnx file itself

Reference: https://github.com/SidJain1412/sentence-transformers/blob/master/examples/onnx/onnx_example.ipynb

In [1]:
import os
import sys
sys.path.append(os.path.abspath(os.path.join('../../..')))

In [2]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
warnings.filterwarnings("ignore", message="TracerWarning: torch.tensor")
warnings.filterwarnings("ignore", message="using SSL with verify_certs=False is insecure.")

import opensearch_py_ml as oml
from opensearchpy import OpenSearch
from opensearch_py_ml.ml_models import SentenceTransformerModel
# import mlcommon to later register the model to OpenSearch Cluster
from opensearch_py_ml.ml_commons import MLCommonClient

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
CLUSTER_URL = 'https://localhost:9200'

In [4]:
def get_os_client(cluster_url = CLUSTER_URL,
                  username='admin',
                  password='admin'):
    '''
    Get OpenSearch client
    :param cluster_url: cluster URL like https://ml-te-netwo-1s12ba42br23v-ff1736fa7db98ff2.elb.us-west-2.amazonaws.com:443
    :return: OpenSearch client
    '''
    client = OpenSearch(
        hosts=[cluster_url],
        http_auth=(username, password),
        verify_certs=False
    )
    return client 

In [5]:
client = get_os_client()

# Connect to ml_common client with OpenSearch client
ml_client = MLCommonClient(client)



## Trace the Model in Onnx Using save_as_onnx
See `opensearch_py_ml/ml_models/sentencetransformermodel.py`

In [6]:
model_id = "sentence-transformers/distiluse-base-multilingual-cased-v2"
folder_path='sentence-transformers-onxx/distiluse-base-multilingual-cased-v2'
model_name = str(model_id.split("/")[-1] + ".onnx")
model_path = os.path.join(folder_path, "onnx", model_name)

In [7]:
## Case I: Initiate SentenceTransformerModel and Call save_as_onnx

pre_trained_model = SentenceTransformerModel(model_id=model_id, folder_path=folder_path, overwrite=True)
model_path_onnx = pre_trained_model.save_as_onnx(model_id=model_id)

ONNX opset version set to: 15
Loading pipeline (model: sentence-transformers/distiluse-base-multilingual-cased-v2, tokenizer: sentence-transformers/distiluse-base-multilingual-cased-v2)
Creating folder sentence-transformers-onxx/distiluse-base-multilingual-cased-v2/onnx
Using framework PyTorch: 1.13.1+cu117
Found input input_ids with shape: {0: 'batch', 1: 'sequence'}
Found input attention_mask with shape: {0: 'batch', 1: 'sequence'}
Found output output_0 with shape: {0: 'batch', 1: 'sequence'}
Found output output_1 with shape: {0: 'batch', 1: 'sequence'}
Found output output_2 with shape: {0: 'batch', 1: 'sequence'}
Found output output_3 with shape: {0: 'batch', 1: 'sequence'}
Found output output_4 with shape: {0: 'batch', 1: 'sequence'}
Found output output_5 with shape: {0: 'batch', 1: 'sequence'}
Found output output_6 with shape: {0: 'batch', 1: 'sequence'}
Found output output_7 with shape: {0: 'batch', 1: 'sequence'}
Ensuring inputs are in correct order
head_mask is not present in t

  mask, torch.tensor(torch.finfo(scores.dtype).min)


model file is saved to  sentence-transformers-onxx/distiluse-base-multilingual-cased-v2/onnx/distiluse-base-multilingual-cased-v2.onnx
zip file is saved to  sentence-transformers-onxx/distiluse-base-multilingual-cased-v2/distiluse-base-multilingual-cased-v2.zip 



In [8]:
## Case II: Repeat what save_as_onnx function does

# from transformers.convert_graph_to_onnx import convert
# from pathlib import Path

# model = SentenceTransformer(model_id)
# folder_path='sentence-transformers-onxx/distiluse-base-multilingual-cased-v1'

# model_name = str(model_id.split("/")[-1] + ".onnx")

# model_path = os.path.join(folder_path, "onnx", model_name)
        
# convert(
#     framework="pt",
#     model=model_id,
#     output=Path(model_path),
#     opset=15,
# )

In [9]:
## Case III: Already run demo_tracing_model_torch_script_onnx_dense notebook 

# Skip to next step since we already have .onnx at model_path

In [10]:
pre_trained_model.make_model_config_json(model_format="ONNX")

ml-commons_model_config.json file is saved at :  sentence-transformers-onxx/distiluse-base-multilingual-cased-v2/ml-commons_model_config.json


'sentence-transformers-onxx/distiluse-base-multilingual-cased-v2/ml-commons_model_config.json'

In [11]:
model_config_path_onnx = 'sentence-transformers-onxx/distiluse-base-multilingual-cased-v2/ml-commons_model_config.json'
ml_client.register_model(model_path_onnx, model_config_path_onnx, isVerbose=True)

Total number of chunks 55
Sha1 value of the model file:  084a3f4ee530228e220461bcfa67e51bd47671f1eaa8ec9163f87d4e3fb03684
Model meta data was created successfully. Model Id:  hv0c7IkBVsgBeq9g7M_J
uploading chunk 1 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 2 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 3 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 4 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 5 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 6 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 7 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 8 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 9 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 10 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 11 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 12 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 13 of 55
Model id: {'status': 'Uploaded'}
uploading chunk 14 of 55
Model id: {'status': 'Uploaded'}
u

'hv0c7IkBVsgBeq9g7M_J'

In [21]:
import numpy as np

input_sentences = ["first sentence", "second sentence"]
intermediate_result = ml_client.generate_embedding("hv0c7IkBVsgBeq9g7M_J", input_sentences)

In [34]:
len(intermediate_result['inference_results'][0]['output'][0]['data'])

768

In [23]:
intermediate_embeddings = intermediate_result['inference_results'][0]['output'][0]['data']

In [53]:
intermediate_embeddings

[-0.13202962,
 0.01418858,
 0.14332087,
 0.021491881,
 -0.05981973,
 -0.086128816,
 -0.18076946,
 -0.020853557,
 -0.008571586,
 0.093089566,
 -0.112192534,
 -0.10462415,
 0.10459483,
 0.10192171,
 -0.07270102,
 0.038279083,
 0.08734473,
 0.1020491,
 -0.021157805,
 0.03701996,
 -0.078975216,
 0.057268586,
 -0.09310073,
 0.07782073,
 0.12747307,
 -0.06221346,
 -0.019866977,
 0.011088589,
 0.046685137,
 -0.092736915,
 0.056246933,
 -0.08186202,
 0.047888342,
 0.0057800524,
 0.021204691,
 -0.0067854356,
 -0.029559096,
 0.07725987,
 0.015944052,
 0.119596526,
 -0.005790744,
 -0.03275883,
 0.066716835,
 0.097194396,
 -0.03354133,
 0.0944589,
 -0.016427733,
 0.042111862,
 -0.027776385,
 -0.11537272,
 0.015753813,
 0.0039558792,
 -0.047661364,
 0.07546026,
 -0.08508188,
 -0.033376228,
 -0.0036116792,
 0.10536797,
 -0.07668075,
 0.03603979,
 -0.054378185,
 -0.00793159,
 -0.081345834,
 0.0059337914,
 0.073633164,
 0.047097564,
 0.05857691,
 0.011896286,
 0.021280527,
 -0.0019628052,
 -0.05558976

In [54]:
import torch
from sentence_transformers.models import Dense
dense_layer = Dense(768, 512, bias=True, activation_function=torch.nn.modules.activation.Tanh())
feature_out = {'sentence_embedding': torch.FloatTensor(intermediate_embeddings)}
dense_layer.forward(feature_out)

{'sentence_embedding': tensor([ 8.1934e-02,  1.4211e-02, -1.1370e-02,  3.6984e-02,  4.2161e-02,
          1.5512e-02, -2.2711e-02, -3.1645e-02,  7.0637e-03, -7.4637e-02,
         -4.3127e-02, -3.1221e-02, -3.4831e-02, -4.4128e-02, -4.7553e-02,
          1.3644e-01,  1.8960e-02,  2.4545e-02, -6.1537e-02, -2.7046e-03,
         -1.8756e-02, -3.3346e-02, -2.1584e-02,  1.0324e-01, -6.2985e-02,
         -7.4708e-03,  6.3186e-02,  3.9015e-02,  1.0546e-02,  6.6613e-02,
          2.3360e-02, -7.0247e-03,  1.9972e-02, -4.7376e-02,  2.6925e-02,
         -3.9706e-02, -1.0326e-01,  3.5522e-02, -8.3631e-02, -4.9460e-02,
          1.0584e-01,  1.2453e-01, -1.9170e-02,  3.0961e-02,  4.3749e-02,
         -1.9934e-03, -7.9404e-03, -7.1748e-02,  2.5771e-02,  4.3843e-02,
         -2.3665e-02,  5.9418e-02,  2.5614e-02,  3.2672e-02,  6.2248e-02,
          2.5844e-05,  2.0746e-02, -2.5283e-02,  6.0733e-02,  9.8539e-03,
         -6.1422e-02,  4.1308e-02,  1.1930e-02,  7.1971e-04, -3.2370e-02,
          1.4993

In [55]:
feature_out['sentence_embedding'].cpu().detach().numpy().shape

(512,)

In [56]:
import numpy as np

from sentence_transformers import SentenceTransformer

original_pre_trained_model = SentenceTransformer(model_id) # From Huggingface
original_embedding_data = list(
    original_pre_trained_model.encode(input_sentences, convert_to_numpy=True)
)

In [57]:
np.testing.assert_allclose(original_embedding_data[0], feature_out['sentence_embedding'].cpu().detach().numpy(), rtol=1e-03, atol=1e-05)

AssertionError: 
Not equal to tolerance rtol=0.001, atol=1e-05

Mismatched elements: 512 / 512 (100%)
Max absolute difference: 0.21871796
Max relative difference: 268.95602
 x: array([ 2.924567e-02,  6.141232e-02, -4.720752e-02,  7.542607e-02,
       -1.127941e-02, -2.926223e-02, -9.203106e-04,  7.731914e-03,
        9.389931e-03, -5.170758e-02,  1.561492e-02, -1.805862e-02,...
 y: array([ 8.193411e-02,  1.421066e-02, -1.136951e-02,  3.698449e-02,
        4.216102e-02,  1.551235e-02, -2.271084e-02, -3.164507e-02,
        7.063746e-03, -7.463747e-02, -4.312677e-02, -3.122143e-02,...