In [8]:
from transformers import AutoModel, AutoTokenizer
import numpy as np

# Step 2: Load the model
model_name = "sentence-transformers/all-mpnet-base-v2"
model = AutoModel.from_pretrained(model_name)

# Step 3: Load the pre-trained tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Step 4: Define the sentences to be matched against
descriptions = [
    "I like to eat many kinds of food. My favorite cuisines are Indian, Turkish and Vietnamese.",
    "This sentence is in English but not related to the input. Let's test the model by writing a sentence in Spanish that is related, but in Spanish to see how the model reacts.",
    "Buscamos una profesional especializado en tecnología con experencia en gestión de proyectos"
]

# Step 5: Preprocess the input and descriptions
input = "I am a software engineer with experience in Python and Java."
description_encodings = tokenizer(descriptions, padding=True, truncation=True, return_tensors="pt")
input_encoding = tokenizer(input, padding=True, truncation=True, return_tensors="pt")

# Step 6: Generate sentence embeddings for each sentence in the pre-defined  descriptions
description_embeddings = model(**description_encodings)["pooler_output"]

# Step 7: Generate a sentence embedding for the input
input_embedding = model(**input_encoding)["pooler_output"]

# Step 8: Compute the cosine similarity between the sentence embedding of the input resume and each sentence embedding of the pre-defined descriptions
similarity_scores = np.inner(input_embedding.detach().numpy(), description_embeddings.detach().numpy())[0]

# Step 9: Select the  description that has the highest cosine similarity with the input as the best match
best_match_index = np.argmax(similarity_scores)
best_match = descriptions[best_match_index]

print("The best match for the input is:", best_match)


The best match for the input is: Buscamos una profesional especializado en tecnología con experencia en gestión de proyectos
