### Sentence Transformers

In [1]:
from sentence_transformers import SentenceTransformer

In [2]:
model = SentenceTransformer('bert-base-nli-mean-tokens')

In [3]:
model

SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
)

In [4]:
sentences = [
    "the fifty mannequin heads floating in the pool kind of freaked them out",
    "she swore she just saw her sushi move",
    "he embraced his new life as an eggplant",
    "my dentist tells me that chewing bricks is very bad for your teeth",
    "the dental specialist recommended an immediate stop to flossing with construction materials"
]

In [5]:
embeddings = model.encode(sentences)
embeddings.shape

(5, 768)

In [6]:
import numpy as np
from sentence_transformers.util import cos_sim

In [8]:
sim = np.zeros((len(sentences), len(sentences)))

In [9]:
for i in range(len(sentences)):
    sim[i:,i] = cos_sim(embeddings[i], embeddings[i:])

In [10]:
sim

array([[0.99999994, 0.        , 0.        , 0.        , 0.        ],
       [0.4091433 , 1.        , 0.        , 0.        , 0.        ],
       [0.10909035, 0.44547969, 1.        , 0.        , 0.        ],
       [0.50074875, 0.30693948, 0.20791668, 1.00000012, 0.        ],
       [0.29936221, 0.38607216, 0.28499275, 0.63849497, 0.99999988]])

In [11]:
mpnet = SentenceTransformer("all-mpnet-base-v2")

In [12]:
mpnet

SentenceTransformer(
  (0): Transformer({'max_seq_length': 384, 'do_lower_case': False}) with Transformer model: MPNetModel 
  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
)

In [13]:
embeddings = mpnet.encode(sentences)
sentence_len = len(sentences)
sim = np.zeros((sentence_len, sentence_len))

In [14]:
for i in range(sentence_len):
    sim[i:,i] = cos_sim(embeddings[i], embeddings[i:])

In [15]:
sim

array([[ 1.00000012,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.26406282,  0.99999988,  0.        ,  0.        ,  0.        ],
       [ 0.16503495,  0.16126682,  1.        ,  0.        ,  0.        ],
       [ 0.04334461,  0.04615873,  0.05670131,  1.00000012,  0.        ],
       [ 0.05398503,  0.06101187, -0.01122262,  0.51847208,  1.00000024]])