In [68]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer

from IPython.display import Markdown, display

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

In [69]:
# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")

In [70]:
def analogy(original: str, to_subract: str, to_add: str):
  return model.encode(original) - model.encode(to_subract) + model.encode(to_add)

In [71]:
def find_best_match(embeddings, possible_words):
  candidates = possible_words.copy()
  np.random.shuffle(candidates)
  return candidates[np.argmin([np.linalg.norm(embeddings - model.encode(word)) for word in candidates])]

In [72]:
def similarity(v1, v2):
  return np.dot(v1, v2)/(np.linalg.norm(v1)*np.linalg.norm(v2))

**Some Solutions for our Words**

In [87]:
possible_words = [
  'queen', 'duchess', 'princess', 'lady', 'shark', 'black', 'linen', 'its', 'your', 'cat', 'dog', 'court', 'palace', 'hers', 'florence', 'marseille', 'london', 'lilly', 'gilded', 'paris', 'berlin', 'canberra', 'new delhi']

**`Queen = King - Man + Woman` Example**

In [88]:
embeddings = analogy('king', 'man', 'woman')
print(embeddings.shape)
embeddings[:5]

(384,)


array([ 0.00448471,  0.01810496, -0.06230411,  0.15338686, -0.1152221 ],
      dtype=float32)

In [89]:
find_best_match(embeddings, possible_words)

'queen'

**`Your = His - He + You` Example**

In [90]:
embeddings = analogy('his', 'he', 'you')

In [91]:
find_best_match(embeddings, possible_words)

'your'

**`Paris = Rome - Italy + France` Example**

In [92]:
embeddings = analogy('Rome', 'Italy', 'France')

In [93]:
find_best_match(embeddings, possible_words)

'paris'