## Quick start

### Usage

In [1]:
from sentence_transformers import SentenceTransformer, util

In [3]:
model = SentenceTransformer('all-MiniLM-L6-v2')

# Sentences we like to encode
sentences = ['Thanks for the internet so i can write this line',
             'Internet help me to access almost every knowledges in this world, include this tutorial',
            #  "I'm really greatful for knowing this tutorial!"
             'I can write this line because of internet']

# Sentences are encoder by calling model.encode()
embeddings = model.encode(sentences)

# Print the embeddings
for sentence, embedding in zip(sentences, embeddings):
  print("Sentence:", sentence)
  # print("Embedding:", embedding)
  print(embedding.shape) #(384,)-fixed sized vector
  print("")

Sentence: Thanks for the internet so i can write this line
(384,)

Sentence: Internet help me to access almost every knowledges in this world, include this tutorial
(384,)

Sentence: I can write this line because of internet
(384,)



### Comparing Sentence Similarities

In [4]:
# Cosine similarity of each pair in three sentences above
for id1 in range(3):
  for id2 in range(id1+1, 3):
    print("Cosine similarity of [" + sentences[id1] + "] and [" + sentences[id2] + "]:", str(util.cos_sim(embeddings[id1], embeddings[id2])))

Cosine similarity of [Thanks for the internet so i can write this line] and [Internet help me to access almost every knowledges in this world, include this tutorial]: tensor([[0.2462]])
Cosine similarity of [Thanks for the internet so i can write this line] and [I can write this line because of internet]: tensor([[0.7547]])
Cosine similarity of [Internet help me to access almost every knowledges in this world, include this tutorial] and [I can write this line because of internet]: tensor([[0.3230]])


In [7]:
# Another way to compute cosine similarity between all pairs of sentences
new_sentences = ['A man is eating food.',
          'A man is eating a piece of bread.',
          'The girl is carrying a baby.',
          'A man is riding a horse.',
          'A woman is playing violin.',
          'Two men pushed carts through the woods.',
          'A man is riding a white horse on an enclosed ground.',
          'A monkey is playing drums.',
          'Someone in a gorilla costume is playing a set of drums.']

#Encode all sentences
new_embeddings = model.encode(new_sentences)

#Compute cosine similarity between all pairs
cos_sim = util.cos_sim(new_embeddings, new_embeddings)

#Add all pairs to a list with their cosine similarity score
all_sentence_combinations = []
for i in range(len(cos_sim)-1):
    for j in range(i+1, len(cos_sim)):
        all_sentence_combinations.append([cos_sim[i][j], i, j])

#Sort list by the highest cosine similarity score
all_sentence_combinations = sorted(all_sentence_combinations, key=lambda x: x[0], reverse=True)

print("Top-5 most similar pairs:")
for score, i, j in all_sentence_combinations[0:5]:
    print("{} \t {} \t {:.4f}".format(new_sentences[i], new_sentences[j], cos_sim[i][j]))


Top-5 most similar pairs:
A man is eating food. 	 A man is eating a piece of bread. 	 0.7553
A man is riding a horse. 	 A man is riding a white horse on an enclosed ground. 	 0.7369
A monkey is playing drums. 	 Someone in a gorilla costume is playing a set of drums. 	 0.6433
A woman is playing violin. 	 Someone in a gorilla costume is playing a set of drums. 	 0.2564
A man is eating food. 	 A man is riding a horse. 	 0.2474


In [7]:
print(cos_sim.shape)

torch.Size([9, 9])
