# Trace Sentence Transfomer

In [1]:
import torch
from sentence_transformers import SentenceTransformer
model_id = "sentence-transformers/msmarco-distilbert-base-tas-b"
model = SentenceTransformer(model_id)
device = torch.device("cpu")
cpu_model = model.to(device)
sentences = ['Sentence 1','Sentence 2']
features = cpu_model.tokenizer(
            sentences, return_tensors="pt", padding=True, truncation=True
        ).to(device)
ex_input = {
                    "input_ids": features["input_ids"],
                    "attention_mask": features["attention_mask"],
                }
traced_model = torch.jit.trace(model, ex_input, strict=False)
torch.jit.save(traced_model, "traced_sentence_transformer.pt")

  from .autonotebook import tqdm as notebook_tqdm
  mask, torch.tensor(torch.finfo(scores.dtype).min)


In [2]:
loaded_model = torch.jit.load("traced_sentence_transformer.pt")
loaded_model.eval()

RecursiveScriptModule(
  original_name=SentenceTransformer
  (0): RecursiveScriptModule(
    original_name=Transformer
    (auto_model): RecursiveScriptModule(
      original_name=DistilBertModel
      (embeddings): RecursiveScriptModule(
        original_name=Embeddings
        (word_embeddings): RecursiveScriptModule(original_name=Embedding)
        (position_embeddings): RecursiveScriptModule(original_name=Embedding)
        (LayerNorm): RecursiveScriptModule(original_name=LayerNorm)
        (dropout): RecursiveScriptModule(original_name=Dropout)
      )
      (transformer): RecursiveScriptModule(
        original_name=Transformer
        (layer): RecursiveScriptModule(
          original_name=ModuleList
          (0): RecursiveScriptModule(
            original_name=TransformerBlock
            (attention): RecursiveScriptModule(
              original_name=MultiHeadSelfAttention
              (dropout): RecursiveScriptModule(original_name=Dropout)
              (q_lin): RecursiveS

In [3]:
test_sentences = ["first sentence", "second sentence"]
features = cpu_model.tokenizer(
            test_sentences, return_tensors="pt", padding=True, truncation=True
        ).to(device)
test_input = {
                    "input_ids": features["input_ids"],
                    "attention_mask": features["attention_mask"],
                }

pt_embedding = loaded_model(test_input)
pt_embedding

{'input_ids': tensor([[ 101, 2034, 6251,  102],
         [ 101, 2117, 6251,  102]]),
 'attention_mask': tensor([[1, 1, 1, 1],
         [1, 1, 1, 1]]),
 'token_embeddings': tensor([[[ 0.2192, -0.2669, -0.2511,  ...,  0.0324, -0.3444, -0.0308],
          [-0.1659, -0.2614, -0.5352,  ...,  0.2587, -0.1672, -0.2693],
          [ 0.3557, -0.1637, -0.1883,  ...,  0.0900, -0.5482, -0.4730],
          [ 0.7434, -0.2904,  0.0787,  ...,  0.5496, -0.8846, -0.1438]],
 
         [[-0.0738, -0.4391, -0.0976,  ...,  0.0442, -0.1356, -0.3168],
          [ 0.1501, -0.7653, -0.1249,  ...,  0.0637,  0.1336, -0.4225],
          [ 0.1538, -0.2468,  0.0779,  ...,  0.1835, -0.3914, -0.5732],
          [ 0.6191, -0.3375,  0.3204,  ...,  0.4383, -0.5994, -0.5623]]],
        grad_fn=<NativeLayerNormBackward0>),
 'sentence_embedding': tensor([[ 0.2192, -0.2669, -0.2511,  ...,  0.0324, -0.3444, -0.0308],
         [-0.0738, -0.4391, -0.0976,  ...,  0.0442, -0.1356, -0.3168]],
        grad_fn=<CatBackward0>)}

In [4]:
pt_embedding['sentence_embedding'].detach().cpu().numpy().shape

(2, 768)

# Compare Embedding

In [5]:
model = SentenceTransformer(model_id)

In [6]:
original_embedding = model.encode(test_sentences, convert_to_numpy=True)
original_embedding

array([[ 0.21917519, -0.26689667, -0.25107574, ...,  0.03239337,
        -0.34440702, -0.03081493],
       [-0.07382002, -0.43907577, -0.09760092, ...,  0.04419765,
        -0.1355662 , -0.31682503]], dtype=float32)

In [7]:
import numpy as np
print(np.testing.assert_allclose(original_embedding, pt_embedding['sentence_embedding'].detach().cpu().numpy(), rtol=1e-03, atol=1e-05))

None
