In [None]:
import pathlib

import faiss
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer, util


In [None]:
MODEL_DIR = pathlib.Path().absolute().parent / "models"

In [None]:
# Define the device to use, using a CUDA GPU if available.
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

In [None]:
 # Model optimized for semantic search
# See https://www.sbert.net/docs/pretrained_models.html
model_name = 'multi-qa-mpnet-base-dot-v1'
model = SentenceTransformer(model_name)

model_name_am = 'sentence-transformers/multi-qa-mpnet-base-dot-v1'
tokenizer_am = AutoTokenizer.from_pretrained(model_name_am, cache_dir=MODEL_DIR)
model_am = AutoModel.from_pretrained(model_name_am, cache_dir=MODEL_DIR)


In [None]:
def encode(strs):
    with torch.no_grad():
        encoded_input = tokenizer_am(strs, padding=True, truncation=True, return_tensors="pt")
        encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
        model_output = model_am(**encoded_input)
    return model_output.last_hidden_state[:, 0, :].detach().cpu().numpy()

d = encode(["foo"]).shape[1]
d

In [None]:
query_embedding = encode('How big is London')
passage_embedding = encode(['London has 9,787,426 inhabitants at the 2011 census',
                            'London is known for its financial district'])

passage_embedding.shape

In [None]:
query_embedding = model.encode('what is the population')
passage_embedding = model.encode(['London has 9,787,426 inhabitants at the 2011 census',
                                  'London is known for its financial district'])

print("Similarity:", util.dot_score(query_embedding, passage_embedding))

In [None]:
(encode(['foo'])[0,:3], model.encode('foo')[:3])

OK. The two models give the same results modulo a simple shape transformation.
So we can use `AutoModel` for everything.