# OpenAI embeddings model which is mutli-language
### English test

In [1]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [2]:
import os
import openai

# Set OpenAI API credentials
openai.api_type = "azure"
openai.api_key = os.getenv("API_KEY")
openai.api_base = os.getenv("BASE_URL")
openai.api_version = "2023-07-01-preview"
deployment_name = os.getenv("DEPLOYMENT_NAME")

In [3]:
# Import list of texts
import mytexts

# Create a pandas DataFrame from the list of words
import pandas as pd

df = pd.DataFrame(mytexts.texts_en, columns=["text"])

def get_embedding(text):
   return openai.Embedding.create(input = [text], deployment_id=deployment_name)['data'][0]['embedding']

df['embedding'] = df['text'].apply(lambda x: get_embedding(x))

In [4]:
from openai.embeddings_utils import cosine_similarity
import numpy as np

def get_similar(source_texts, source_embeddings, target_embedding):
    df = pd.DataFrame(source_texts, columns=['text'])
    df['embedding'] = source_embeddings
    df['similarities'] = df['embedding'].apply(lambda x: cosine_similarity(x, target_embedding))
    return df.sort_values(by='similarities', ascending=False).drop(columns=['embedding'])

get_similar(df['text'], df['embedding'], get_embedding('Louis XIV'))

Unnamed: 0,text,similarities
0,King: A male monarch who typically inherits hi...,0.780172
7,Castle: A fortified feudal residence that was ...,0.769544
1,Queen: A female monarch who typically inherits...,0.755862
5,Lion: A member of the cat family and one of th...,0.720298
2,Man: A male human being who is distinguished b...,0.709873
8,Skittles: A colloquial term for a sport in whi...,0.706822
4,Dog: A domesticated carnivorous mammal that ha...,0.694139
6,Goulash: A dish made from pieces of meat stewe...,0.692579
3,Woman: A female human being who is distinguish...,0.686459


In [9]:
get_similar(df['text'], df['embedding'], get_embedding('žena sedící na trůnu'))

Unnamed: 0,text,similarities
1,Queen: A female monarch who typically inherits...,0.747302
3,Woman: A female human being who is distinguish...,0.746291
6,Goulash: A dish made from pieces of meat stewe...,0.721599
7,Castle: A fortified feudal residence that was ...,0.712214
2,Man: A male human being who is distinguished b...,0.71201
0,King: A male monarch who typically inherits hi...,0.711632
5,Lion: A member of the cat family and one of th...,0.699213
8,Skittles: A colloquial term for a sport in whi...,0.691587
4,Dog: A domesticated carnivorous mammal that ha...,0.684612


In [13]:
get_similar(df['text'], df['embedding'], get_embedding('sport where a player rolls a ball towards pins or another target. The goal is to knock over the pins on a long playing surface known as a lane. A strike is achieved when all the pins are knocked down on the first roll, and a spare is achieved if all the pins are knocked over on a second roll.'))

Unnamed: 0,text,similarities
8,Skittles: A colloquial term for a sport in whi...,0.837932
0,King: A male monarch who typically inherits hi...,0.752774
7,Castle: A fortified feudal residence that was ...,0.73923
1,Queen: A female monarch who typically inherits...,0.737297
2,Man: A male human being who is distinguished b...,0.722209
3,Woman: A female human being who is distinguish...,0.721992
6,Goulash: A dish made from pieces of meat stewe...,0.716715
5,Lion: A member of the cat family and one of th...,0.713899
4,Dog: A domesticated carnivorous mammal that ha...,0.700414


In [10]:
get_similar(df['text'], df['embedding'], get_embedding('人类经常饲养的四足兽'))

Unnamed: 0,text,similarities
4,Dog: A domesticated carnivorous mammal that ha...,0.783464
5,Lion: A member of the cat family and one of th...,0.739099
2,Man: A male human being who is distinguished b...,0.697929
6,Goulash: A dish made from pieces of meat stewe...,0.69742
7,Castle: A fortified feudal residence that was ...,0.694151
0,King: A male monarch who typically inherits hi...,0.692136
8,Skittles: A colloquial term for a sport in whi...,0.691592
3,Woman: A female human being who is distinguish...,0.689538
1,Queen: A female monarch who typically inherits...,0.686652


In [8]:
# Vector calculations
positive_text = ["King is a hereditary title of a monarch in a monarchy. In the past, a king was the highest authority in the country and had many privileges.",
                 "Woman is a noun of the female gender that refers to an adult female human. In general language, the word woman is also used as a synonym for lady or madam."]
negative_text = ["Man is a noun of the male gender that refers to an adult male human. In general language, the word man is also used as a synonym for guy or boy."]
vector = np.zeros(1536)
vector = sum([np.array(get_embedding(text)) for text in positive_text])
vector = vector + sum([-np.array(get_embedding(text)) for text in negative_text])

get_similar(df['text'], df['embedding'], vector)

Unnamed: 0,text,similarities
1,Queen: A female monarch who typically inherits...,0.865427
0,King: A male monarch who typically inherits hi...,0.821113
3,Woman: A female human being who is distinguish...,0.76455
7,Castle: A fortified feudal residence that was ...,0.735017
5,Lion: A member of the cat family and one of th...,0.717342
8,Skittles: A colloquial term for a sport in whi...,0.704035
4,Dog: A domesticated carnivorous mammal that ha...,0.679559
6,Goulash: A dish made from pieces of meat stewe...,0.658388
2,Man: A male human being who is distinguished b...,0.653546
