# Generating the Embeddings with VoyageAI

## Loading libraries and Setting VoyageAI Client

In [1]:
# Importing library
import json
import pandas as pd
import os
import voyageai

# Setting the working directory
def find_repo_root(repo_name='orare-model'):
    current_dir = os.getcwd()
    while current_dir != '/':
        if os.path.basename(current_dir) == repo_name:
            return current_dir
        current_dir = os.path.dirname(current_dir)
    raise FileNotFoundError(f"Repository root '{repo_name}' not found.")

repo_root = find_repo_root()
os.chdir(repo_root)

# Instantiating the VoyageAI API client
vo = voyageai.Client(api_key=os.getenv("VOYAGE_API_KEY"))

## Catholic Bible interpreted: v3 - Claude

In [None]:
# Loading the data
with open('bible/data/cat_bible_int_claude.json') as f:
    data = json.load(f)

# Converting json into df
bible_data = pd.DataFrame(data)
bible_data.head()

In [62]:
embeddings_list = []
# Process to get the interpretion of all the Bible in batches of 10 rows
for i in range(0, len(bible_data), 100):
    # Getting the batch
    batch = bible_data.iloc[i:i+100]
    input_list = []
    for _, row in batch.iterrows():
        input_list.append(row['interpretacion'])
    # Embed the documents
    result = vo.embed(input_list, model="voyage-multilingual-2", input_type="document")
    # Append the embeddings to the embeddings_list
    for i in result.embeddings:
        embeddings_list.append(i)

In [67]:
# Append the embeddings to the dataframe
bible_data['interpretacion_vector'] = embeddings_list

# Saving the embeddings in a file
bible_data.to_csv('bible/data/cat_bible_int_claude_emb_voy.txt',sep='|',index=False, encoding='utf-8')

## Catholic Bible interpreted: v3 - OpenAI

In [2]:
# Loading the data
with open('bible/data/cat_bible_int_openai.json') as f:
    data = json.load(f)

# Converting json into df
bible_data = pd.DataFrame(data)

# Creating an id column
bible_data['id'] = bible_data.index + 1
bible_data = bible_data[['id','pasaje','texto','interpretacion','area_vida']]
bible_data.head()

Unnamed: 0,id,pasaje,texto,interpretacion,area_vida
0,1,1 Corintios 10:12,"Por eso, el que se cree muy seguro, ¡cuídese d...","Un gerente de una empresa, confiado en su expe...",[orgullo]
1,2,1 Corintios 10:13,"Hasta ahora, ustedes no tuvieron tentaciones q...","Imagina a Laura, una joven profesional que se ...","[crecimiento espiritual, maternidad, vicio]"
2,3,1 Corintios 10:31,"En resumen, sea que ustedes coman, sea que beb...",Anna es una joven profesional que trabaja en u...,[descanso y vacaciones]
3,4,1 Corintios 11:9,"ni fue creado el hombre a causa de la mujer, s...","En una moderna empresa de tecnología, el papel...",[creación/evolución]
4,5,1 Corintios 13:13,"En una palabra, ahora existen tres cosas: la f...","Imagina un grupo de amigos que, a pesar de viv...",[amor]


In [3]:
embeddings_list = []
# Process to get the interpretion of all the Bible in batches of 10 rows
for i in range(0, len(bible_data), 100):
    # Getting the batch
    batch = bible_data.iloc[i:i+100]
    input_list = []
    for _, row in batch.iterrows():
        input_list.append(row['interpretacion'])
    # Embed the documents
    result = vo.embed(input_list, model="voyage-multilingual-2", input_type="document")
    # Append the embeddings to the embeddings_list
    for i in result.embeddings:
        embeddings_list.append(i)

In [7]:
# Append the embeddings to the dataframe
bible_data['interpretacion_vector'] = embeddings_list

# Saving the embeddings in a file
bible_data.to_csv('bible/data/cat_bible_int_openai_emb_voy.txt',sep='|',index=False, encoding='utf-8')