In [19]:
import numpy as np
import onnx
import onnxruntime as ort
import torch
from transformers import AutoModelForMaskedLM, AutoTokenizer

## Running the model with Transformers and Torch

In [17]:
sentences = [
    """The Manhattan Project and its atomic bomb helped bring an end to World War II. Its legacy of peaceful uses of atomic energy continues to have an impact on history and science.""",
    "My Name is Nirant",
]

## PyTorch Code from the [SPLADERunner](https://github.com/PrithivirajDamodaran/SPLADERunner) library

In [5]:
hf_token = "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"

In [18]:
# Download the model and tokenizer
device = "cuda:0" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("prithivida/Splade_PP_en_v1", token=hf_token)
reverse_voc = {v: k for k, v in tokenizer.vocab.items()}
model = AutoModelForMaskedLM.from_pretrained("prithivida/Splade_PP_en_v1", token=hf_token)
model.to(device)

# Tokenize the input
inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {key: val.to(device) for key, val in inputs.items()}
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
token_type_ids = inputs["token_type_ids"]

# Run model and prepare sparse vector
outputs = model(**inputs)
logits = outputs.logits
print("Output Logits shape: ", logits.shape)
print("Output Attention mask shape: ", attention_mask.shape)
relu_log = torch.log(1 + torch.relu(logits))
weighted_log = relu_log * attention_mask.unsqueeze(-1)
max_val, _ = torch.max(weighted_log, dim=1)
vector = max_val.squeeze()
print("Sparse Vector shape: ", vector.shape)
# print("Number of Actual Dimensions: ", len(cols))
cols = [vec.nonzero().squeeze().cpu().tolist() for vec in vector]
weights = [vec[col].cpu().tolist() for vec, col in zip(vector, cols)]

idx = 1
cols, weights = cols[idx], weights[idx]
# Print the BOW representation
d = {k: v for k, v in zip(cols, weights)}
sorted_d = {k: v for k, v in sorted(d.items(), key=lambda item: item[1], reverse=True)}
bow_rep = []
for k, v in sorted_d.items():
    bow_rep.append((reverse_voc[k], round(v,2)))
print(f"SPLADE BOW rep for sentence:\t{sentences[idx]}\n{bow_rep}")

Output Logits shape:  torch.Size([2, 36, 30522])
Output Attention mask shape:  torch.Size([2, 36])
Sparse Vector shape:  torch.Size([2, 30522])
SPLADE BOW rep for sentence:	My Name is Nirant
[('##rant', 3.13), ('ni', 2.96), ('name', 2.57), ('my', 1.39), ('thomas', 0.91), ('who', 0.8), ('austin', 0.61), (',', 0.46), ('me', 0.43), ('surname', 0.34), ('whom', 0.3), ('his', 0.22), ('tribe', 0.11), ('i', 0.02)]


## Export with output_attentions and logits

In [21]:
from optimum.exporters.onnx import main_export
from transformers import AutoTokenizer

model_id = "nirantk/SPLADE_PP_en_v1"
output_dir = f"models/{model_id.replace('/', '_')}"
model_kwargs = {"output_attentions": True, "return_dict": True}

print(f"Exporting model to {output_dir}")
tokenizer.save_pretrained(output_dir)
# main_export(
#     model_id,
#     output=output_dir,
#     no_post_process=True,
#     model_kwargs=model_kwargs,
#     token=hf_token,
# )

Exporting model to models/nirantk_SPLADE_PP_en_v1


('models/nirantk_SPLADE_PP_en_v1/tokenizer_config.json',
 'models/nirantk_SPLADE_PP_en_v1/special_tokens_map.json',
 'models/nirantk_SPLADE_PP_en_v1/vocab.txt',
 'models/nirantk_SPLADE_PP_en_v1/added_tokens.json',
 'models/nirantk_SPLADE_PP_en_v1/tokenizer.json')

## Running the model with ONNX

In [24]:
from optimum.onnxruntime import ORTModelForMaskedLM
model = ORTModelForMaskedLM.from_pretrained("nirantk/SPLADE_PP_en_v1")
tokenizer = AutoTokenizer.from_pretrained("nirantk/SPLADE_PP_en_v1")

In [29]:
inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)
inputs = {key: val.to(device) for key, val in inputs.items()}
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
token_type_ids = inputs["token_type_ids"]

onnx_input = {
    "input_ids": input_ids.cpu().numpy(),
    "attention_mask": attention_mask.cpu().numpy(),
    "token_type_ids": token_type_ids.cpu().numpy(),
}

logits = model(**onnx_input).logits

In [30]:
logits.shape

(2, 36, 30522)

In [41]:
print("Output Logits shape: ", logits.shape)

relu_log = np.log(1 + np.maximum(logits, 0))

# Equivalent to relu_log * attention_mask.unsqueeze(-1)
# For NumPy, you might need to explicitly expand dimensions if 'attention_mask' is not already 2D
weighted_log = relu_log * np.expand_dims(attention_mask, axis=-1)

# Equivalent to torch.max(weighted_log, dim=1)
# NumPy's max function returns only the max values, not the indices, so we don't need to unpack two values
max_val = np.max(weighted_log, axis=1)

# Equivalent to max_val.squeeze()
# This step may be unnecessary in NumPy if max_val doesn't have unnecessary dimensions
vector = np.squeeze(max_val)
print("Sparse Vector shape: ", vector.shape)

# print(vector[0].nonzero())

cols = [vec.nonzero()[0].squeeze().tolist() for vec in vector]
weights = [vec[col].tolist() for vec, col in zip(vector, cols)]

idx = 1
cols, weights = cols[idx], weights[idx]
# Print the BOW representation
d = {k: v for k, v in zip(cols, weights)}
sorted_d = {k: v for k, v in sorted(d.items(), key=lambda item: item[1], reverse=True)}
bow_rep = []
for k, v in sorted_d.items():
    bow_rep.append((reverse_voc[k], round(v,2)))
print(f"SPLADE BOW rep for sentence:\t{sentences[idx]}\n{bow_rep}")

Output Logits shape:  (2, 36, 30522)
Sparse Vector shape:  (2, 30522)
SPLADE BOW rep for sentence:	My Name is Nirant
[('##rant', 3.13), ('ni', 2.96), ('name', 2.57), ('my', 1.39), ('thomas', 0.91), ('who', 0.8), ('austin', 0.61), (',', 0.46), ('me', 0.43), ('surname', 0.34), ('whom', 0.3), ('his', 0.22), ('tribe', 0.11), ('i', 0.02)]
