In [None]:
import os
from dotenv import load_dotenv
import cohere
import numpy as np
import pandas as pd

load_dotenv()
co = cohere.ClientV2(os.environ['COHERE_API_KEY'])

# How to Use These Embeddings?
https://cohere.com/llmu/sentence-word-embeddings

In [None]:
phrases = [
    "I like my dog",
    "I love my dog",
    "I adore my dog",
    "Hello, how are you?",
    "Hey, how's it going?",
    "Hi, what's up?",
    "I love watching soccer",
    "I enjoyed watching the world cup",
    "I like watching soccer matches",
]

In [None]:
model = "embed-v4.0"
input_type = "search_query"

res = co.embed(
    texts=phrases,
    model=model,
    input_type=input_type,
    output_dimension=1024,
    embedding_types=["float"],
)
embeddings = res.embeddings.float
size = len(embeddings)

# compare them
def calculate_similarity(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

In [None]:
df = pd.DataFrame(index=phrases, columns=pd.RangeIndex(size))

for i in range(size):
    for j in pd.RangeIndex(i, size):
        norm = calculate_similarity(embeddings[i], embeddings[j])
        df.iloc[i, j] = norm
        df.iloc[j, i] = norm
df 


# Multilingual Sentence Embedding

In [None]:
phrases = [
    "The bear lives in the woods",
    "El oso vive en el bosque",
    "L’ours vit dans la foret",
    "熊は木立の中で暮らす",
    "The world cup is in Qatar",
    "El mundial es en Qatarv",
    "La coupe du monde est au Qatar",
    "W杯はカタールで開催される",
    "An apple is a fruit",
    "Una manzana es una fruta",
    "Une pomme est un fruit",
    "リンゴは果物です",
    "El cielo es azul",
    "The sky is blue",
    "Le ciel est bleu",
    "空は青い"
]
res = co.embed(
    texts=phrases,
    model=model,
    input_type=input_type,
    output_dimension=1024,
    embedding_types=["float"],
)
embeddings = res.embeddings.float
size = len(embeddings)

df = pd.DataFrame(index=phrases, columns=pd.RangeIndex(size))

for i in range(size):
    for j in pd.RangeIndex(i, size):
        norm = calculate_similarity(embeddings[i], embeddings[j])
        df.iloc[i, j] = norm
        df.iloc[j, i] = norm
df 
