# FlagEmbeddings LLM optimized English model

In [3]:
from FlagEmbedding import LLMEmbedder
model = LLMEmbedder('BAAI/llm-embedder', use_fp16=False)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Import list of texts
import mytexts

# Create a pandas DataFrame from the list of words
import pandas as pd

df = pd.DataFrame(mytexts.texts_en, columns=["text"])

def get_embedding(text):
   return model.encode_keys(text, task="qa")

df['embedding'] = df['text'].apply(lambda x: get_embedding(x))

In [5]:
from openai.embeddings_utils import cosine_similarity
import numpy as np

def get_query_embedding(text):
   return model.encode_queries(text, task="qa")

def get_similar(source_texts, source_embeddings, target_embedding):
    df = pd.DataFrame(source_texts, columns=['text'])
    df['embedding'] = source_embeddings
    df['similarities'] = df['embedding'].apply(lambda x: cosine_similarity(x, target_embedding))
    return df.sort_values(by='similarities', ascending=False).drop(columns=['embedding'])

get_similar(df['text'], df['embedding'], get_query_embedding('Louis XIV'))

Unnamed: 0,text,similarities
7,Castle: A fortified feudal residence that was ...,0.738191
1,Queen: A female monarch who typically inherits...,0.734944
0,King: A male monarch who typically inherits hi...,0.70906
6,Goulash: A dish made from pieces of meat stewe...,0.702807
5,Lion: A member of the cat family and one of th...,0.697316
4,Dog: A domesticated carnivorous mammal that ha...,0.682414
3,Woman: A female human being who is distinguish...,0.681161
2,Man: A male human being who is distinguished b...,0.680867
8,Skittles: A colloquial term for a sport in whi...,0.675007


In [14]:
get_similar(df['text'], df['embedding'], get_query_embedding('žena sedící na trůnu'))

Unnamed: 0,text,similarities
6,Goulash: A dish made from pieces of meat stewe...,0.701322
3,Woman: A female human being who is distinguish...,0.661519
8,Skittles: A colloquial term for a sport in whi...,0.660257
2,Man: A male human being who is distinguished b...,0.653746
0,King: A male monarch who typically inherits hi...,0.638671
1,Queen: A female monarch who typically inherits...,0.635569
7,Castle: A fortified feudal residence that was ...,0.634563
5,Lion: A member of the cat family and one of th...,0.623895
4,Dog: A domesticated carnivorous mammal that ha...,0.621748


In [16]:
get_similar(df['text'], df['embedding'], get_query_embedding('sport where a player rolls a ball towards pins or another target. The goal is to knock over the pins on a long playing surface known as a lane. A strike is achieved when all the pins are knocked down on the first roll, and a spare is achieved if all the pins are knocked over on a second roll.'))

Unnamed: 0,text,similarities
8,Skittles: A colloquial term for a sport in whi...,0.818117
6,Goulash: A dish made from pieces of meat stewe...,0.71795
0,King: A male monarch who typically inherits hi...,0.717358
1,Queen: A female monarch who typically inherits...,0.70439
3,Woman: A female human being who is distinguish...,0.704244
2,Man: A male human being who is distinguished b...,0.695556
7,Castle: A fortified feudal residence that was ...,0.686836
4,Dog: A domesticated carnivorous mammal that ha...,0.684949
5,Lion: A member of the cat family and one of th...,0.677498


In [15]:
get_similar(df['text'], df['embedding'], get_query_embedding('人类经常饲养的四足兽'))

Unnamed: 0,text,similarities
4,Dog: A domesticated carnivorous mammal that ha...,0.725191
2,Man: A male human being who is distinguished b...,0.719035
3,Woman: A female human being who is distinguish...,0.718909
6,Goulash: A dish made from pieces of meat stewe...,0.71778
8,Skittles: A colloquial term for a sport in whi...,0.716307
5,Lion: A member of the cat family and one of th...,0.713571
0,King: A male monarch who typically inherits hi...,0.684491
7,Castle: A fortified feudal residence that was ...,0.681205
1,Queen: A female monarch who typically inherits...,0.680594


In [13]:
# Vector calculations
positive_text = ["King is a hereditary title of a monarch in a monarchy. In the past, a king was the highest authority in the country and had many privileges.",
                 "Woman is a noun of the female gender that refers to an adult female human. In general language, the word woman is also used as a synonym for lady or madam."]
negative_text = ["Man is a noun of the male gender that refers to an adult male human. In general language, the word man is also used as a synonym for guy or boy."]
vector = np.zeros(1536)
vector = sum([np.array(get_embedding(text)) for text in positive_text])
vector = vector + sum([-np.array(get_embedding(text)) for text in negative_text])

get_similar(df['text'], df['embedding'], vector)

Unnamed: 0,text,similarities
1,Queen: A female monarch who typically inherits...,0.90036
0,King: A male monarch who typically inherits hi...,0.872107
3,Woman: A female human being who is distinguish...,0.796563
7,Castle: A fortified feudal residence that was ...,0.778757
5,Lion: A member of the cat family and one of th...,0.771861
4,Dog: A domesticated carnivorous mammal that ha...,0.739622
2,Man: A male human being who is distinguished b...,0.737517
6,Goulash: A dish made from pieces of meat stewe...,0.717545
8,Skittles: A colloquial term for a sport in whi...,0.707959
