In [3]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

#Our sentences we like to encode
sentences = ['This framework generates embeddings for each input sentence',
    'Sentences are passed as a list of string.', 
    'The quick brown fox jumps over the lazy dog.']

#Sentences are encoded by calling model.encode()
sentence_embeddings = model.encode(sentences)

#Print the embeddings
for sentence, embedding in zip(sentences, sentence_embeddings):
    print("Sentence:", sentence)
    print("Embedding:", embedding)
    print("")

Sentence: This framework generates embeddings for each input sentence
Embedding: [-1.37173673e-02 -4.28515188e-02 -1.56286117e-02  1.40537480e-02
  3.95537615e-02  1.21796295e-01  2.94334143e-02 -3.17524299e-02
  3.54959853e-02 -7.93140084e-02  1.75878387e-02 -4.04369943e-02
  4.97259647e-02  2.54912283e-02 -7.18700960e-02  8.14968869e-02
  1.47069315e-03  4.79627140e-02 -4.50336449e-02 -9.92174894e-02
 -2.81769652e-02  6.45046234e-02  4.44670245e-02 -4.76216972e-02
 -3.52952629e-02  4.38671745e-02 -5.28566092e-02  4.33048379e-04
  1.01921506e-01  1.64072346e-02  3.26996334e-02 -3.45986784e-02
  1.21339411e-02  7.94871300e-02  4.58344258e-03  1.57778524e-02
 -9.68206115e-03  2.87625846e-02 -5.05806133e-02 -1.55793587e-02
 -2.87906844e-02 -9.62280761e-03  3.15556936e-02  2.27349009e-02
  8.71449262e-02 -3.85027155e-02 -8.84718299e-02 -8.75497703e-03
 -2.12343354e-02  2.08923239e-02 -9.02078077e-02 -5.25732152e-02
 -1.05638849e-02  2.88310628e-02 -1.61455087e-02  6.17836369e-03
 -1.23234

In [4]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

#Sentences are encoded by calling model.encode()
emb1 = model.encode("This is a red cat with a hat.")
emb2 = model.encode("Have you seen my red cat?")

cos_sim = util.cos_sim(emb1, emb2)
print("Cosine-Similarity:", cos_sim)

Cosine-Similarity: tensor([[0.6153]])


In [5]:
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer('all-MiniLM-L6-v2')

sentences = ['A man is eating food.',
          'A man is eating an apple.',
          'The girl is carrying a baby.',
          'A man is riding a horse.',
          'A woman is playing violin.',
          'Two men pushed carts through the woods.',
          'A man is riding a white horse on an enclosed ground.',
          'A monkey is playing drums.',
          'Someone in a gorilla costume is playing a set of drums.'
          ]

#Encode all sentences
embeddings = model.encode(sentences)

#Compute cosine similarity between all pairs
cos_sim = util.cos_sim(embeddings, embeddings)

#Add all pairs to a list with their cosine similarity score
all_sentence_combinations = []
for i in range(len(cos_sim)-1):
    for j in range(i+1, len(cos_sim)):
        all_sentence_combinations.append([cos_sim[i][j], i, j])

#Sort list by the highest cosine similarity score
all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

print("Top-5 most similar pairs:")
for score, i, j in all_sentence_combinations[0:5]:
    print("{} \t {} \t {:.4f}".format(sentences[i], sentences[j], cos_sim[i][j]))

Top-5 most similar pairs:
A man is eating food. 	 A man is eating a piece of bread. 	 0.7553
A man is riding a horse. 	 A man is riding a white horse on an enclosed ground. 	 0.7369
A monkey is playing drums. 	 Someone in a gorilla costume is playing a set of drums. 	 0.6433
A woman is playing violin. 	 Someone in a gorilla costume is playing a set of drums. 	 0.2564
A man is eating food. 	 A man is riding a horse. 	 0.2474
