In [29]:
from dotenv import load_dotenv
load_dotenv()

True

In [30]:
from openai import OpenAI
openai_client = OpenAI()

In [31]:
import numpy as np

def cosine_similarity(vec1, vec2):
    """
    Calculate the cosine similarity betwwen two vectors.
    
    Parameters:
         vec1 (numpy array): First vector
         vec2 (numpy array): Second vector
         
         Return:
             float: Cosine similarity between vec1 and vec2
    """
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    
    if norm_vec1 == 0 or norm_vec2 == 0:
        return 0.0
    
    return dot_product / (norm_vec1 * norm_vec2)

In [32]:
king_embedding_response = openai_client.embeddings.create(
    input="king",
    model="text-embedding-3-large"
)
king_vector = np.array(king_embedding_response.data[0].embedding)

In [33]:
queen_embedding_response = openai_client.embeddings.create(
    input="queen",
    model="text-embedding-3-large"
)
queen_vector = np.array(queen_embedding_response.data[0].embedding)

In [34]:
king_queen_similarity = cosine_similarity(king_vector, queen_vector)

In [35]:
king_queen_similarity

0.5524973770974239

In [36]:
slave_embedding_response = openai_client.embeddings.create(
    input="slave",
    model="text-embedding-3-large"
)
slave_vector = np.array(slave_embedding_response.data[0].embedding)

In [37]:
king_slave_similarity = cosine_similarity(king_vector, slave_vector)

In [38]:
king_slave_similarity

0.2932395660481018

In [39]:
korean_king_embedding_response = openai_client.embeddings.create(
    input="korean_king",
    model="text-embedding-3-large"
)
korean_king_vector = np.array(korean_king_embedding_response.data[0].embedding)

In [40]:
openai_king_korean_king_similarity = cosine_similarity(king_vector, korean_king_vector)

In [41]:
openai_king_korean_king_similarity

0.4771969991874785

In [42]:
import os

UPSTAGE_API_KEY = os.getenv("UPSTAGE_API_KEY")

upstage_client = OpenAI(
    api_key=UPSTAGE_API_KEY,
    base_url="https://api.upstage.ai/v1/solar"
)

In [43]:
upstage_king_response = upstage_client.embeddings.create(
    input="king",
    model="solar-embedding-1-large-query"
)

upstage_king_vector = np.array(upstage_king_response.data[0].embedding)

In [44]:
upstage_korean_king_response = upstage_client.embeddings.create(
    input="왕",
    model="solar-embedding-1-large-query"
)

upstage_korean_king_vector = np.array(upstage_korean_king_response.data[0].embedding)

In [45]:
upstage_korean_king_vector

array([-0.01207733, -0.0224762 , -0.01322937, ..., -0.00020826,
        0.00362587,  0.01420593])

In [46]:
upstage_king_korean_king_similarity = cosine_similarity(upstage_king_vector, upstage_korean_king_vector)

In [47]:
upstage_king_korean_king_similarity

0.852149171074866

In [48]:
upstage_queen_response = upstage_client.embeddings.create(
    input="queen",
    model="solar-embedding-1-large-query"
)

upstage_queen_vector = np.array(upstage_queen_response.data[0].embedding)

In [49]:
upstage_king_queen_similarity = cosine_similarity(upstage_king_vector, upstage_queen_vector)

In [50]:
upstage_king_queen_similarity

0.6277983746920601

In [51]:
upstage_slave_response = upstage_client.embeddings.create(
    input="slave",
    model="solar-embedding-1-large-query"
)

upstage_slave_vector = np.array(upstage_slave_response.data[0].embedding)
upstage_slave_vector

array([-0.00225449, -0.01824951, -0.00458145, ...,  0.01241302,
       -0.01144409,  0.01820374])

In [52]:
upstage_king_slave_similarity = cosine_similarity(upstage_king_vector, upstage_slave_vector)

In [53]:
upstage_king_slave_similarity

0.4721607744346731

In [54]:
upstage_korean_queen_response = upstage_client.embeddings.create(
    input="여왕",
    model="solar-embedding-1-large-query"
)

upstage_korean_queen_vector = np.array(upstage_korean_queen_response.data[0].embedding)

In [55]:
upstage_korean_king_korean_queen_similarity = cosine_similarity(upstage_korean_king_vector, upstage_korean_queen_vector)

In [56]:
upstage_korean_king_korean_queen_similarity

0.6812030702716614