In [None]:
import chromadb
import chromadb.utils.embedding_functions as embedding_functions
import pandas as pd
from google import genai
from google.genai import types

# import
model = "gemini-2.5-flash-lite-preview-06-17"
api_key = "your_api_key_here"

# use directly
google_ef  = embedding_functions.GoogleGenerativeAiEmbeddingFunction(api_key=api_key, task_type="RETRIEVAL_QUERY", model_name="models/text-embedding-004")
client_db = chromadb.HttpClient(host='localhost', port=8000)
client_ai = genai.Client(api_key=api_key)


In [None]:
result = client_ai.models.embed_content(
        model="models/text-embedding-004",
        contents="What is the meaning of life?",
        config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT")
)

In [19]:
bible_df = pd.read_csv('data/t_web.csv')
bible_keys = pd.read_csv('data/key_english.csv')
bible_df = pd.merge(bible_df, bible_keys[['b','n']], on='b')
book_group = pd.pivot_table(bible_df, index=['n','c'], values='t', 
                            aggfunc=lambda x: ' '.join(x)).reset_index()

In [20]:
book_group["t"][0]

"Adam, Seth, Enosh, Kenan, Mahalalel, Jared, Enoch, Methuselah, Lamech, Noah, Shem, Ham, and Japheth. The sons of Japheth: Gomer, and Magog, and Madai, and Javan, and Tubal, and Meshech, and Tiras. The sons of Gomer: Ashkenaz, and Diphath, and Togarmah. The sons of Javan: Elishah, and Tarshish, Kittim, and Rodanim. The sons of Ham: Cush, and Mizraim, Put, and Canaan. The sons of Cush: Seba, and Havilah, and Sabta, and Raama, and Sabteca. The sons of Raamah: Sheba, and Dedan. Cush became the father of Nimrod; he began to be a mighty one in the earth. Mizraim became the father of Ludim, and Anamim, and Lehabim, and Naphtuhim, and Pathrusim, and Casluhim (from whence came the Philistines), and Caphtorim. Canaan became the father of Sidon his firstborn, and Heth, and the Jebusite, and the Amorite, and the Girgashite, and the Hivite, and the Arkite, and the Sinite, and the Arvadite, and the Zemarite, and the Hamathite. The sons of Shem: Elam, and Asshur, and Arpachshad, and Lud, and Aram, a

In [13]:
from tqdm import tqdm
import time

book_group_summary = []

for v in tqdm(book_group.to_numpy()):
    response = client_ai.models.generate_content(
    model=model,
    contents=f"""Summarize below bible chapter. 
    Detailed information like numbers, names must be accurately preserved.
    <CHAPTER>
    {v[2]}
    </CHAPTER>
    """)
    
    if not response.candidates:
        print("No candidates found for:", v[0], v[1])
        continue

    rc = response.candidates[0]
    
    if not rc.content or not rc.content.parts:
        print("No content or parts found for:", v[0], v[1])
        continue

    try:
        summary = rc.content.parts[0].text
    except:
        summary = "error"
        
    book_group_summary.append((v[0], v[1], v[2], summary))
    
    # there is limit for gemini-2.0-flash
    # 15 requests per minute
    # 1500 requests per day
    time.sleep(0.5) 



 39%|███▉      | 465/1189 [17:01<20:48,  1.72s/it] 

No content or parts found for: Genesis 17


100%|██████████| 1189/1189 [40:54<00:00,  2.06s/it]


In [49]:
book_summary_df = pd.DataFrame(book_group_summary, columns=['book', 'chapter', 'text', 'summary'
])
book_summary_df.to_csv('data/bible_summary.csv', index=False)
book_summary_df

Unnamed: 0,book,chapter,text,summary
0,1 Chronicles,1,"Adam, Sheth, Enosh, Kenan, Mahalaleel, Jered, ...","This chapter provides a detailed genealogy, be..."
1,1 Chronicles,2,"These are the sons of Israel; Reuben, Simeon, ...",This chapter provides a detailed genealogy of ...
2,1 Chronicles,3,"Now these were the sons of David, who were bor...",This chapter lists the sons of King David and ...
3,1 Chronicles,4,"The sons of Judah; Pharez, Hezron, and Carmi, ...",This chapter details the genealogies of the tr...
4,1 Chronicles,5,Now the sons of Reuben the first-born of Israe...,This chapter of the Bible begins by establishi...
...,...,...,...,...
1183,Zechariah,13,In that day there shall be a fountain opened t...,This chapter describes a future day of cleansi...
1184,Zechariah,14,"Behold, the day of the LORD cometh, and thy sp...","This chapter describes the Day of the LORD, wh..."
1185,Zephaniah,1,The word of the LORD which came to Zephaniah t...,The first chapter of Zephaniah begins with the...
1186,Zephaniah,2,"Assemble yourselves, yes, assemble, O nation n...","This chapter of the Bible, attributed to Zepha..."


In [50]:
client_db.delete_collection(name="bible_wbt")
collection = client_db.create_collection(name="bible_wbt", embedding_function=google_ef)

In [51]:
batch_size = 100
documents_batch = []
ids_batch = []
metadatas_batch = []

current_index = 0
for idx, v in tqdm(enumerate(bible_df.to_numpy()), total=bible_df.shape[0]):
    book = v[-1]
    chapter = v[2]
    verse = v[3]
    text = v[4]
    
    # Get the current verse index within the chapter
    chapter_verses = bible_df[(bible_df['n'] == book) & (bible_df['c'] == chapter)].reset_index(drop=True)
    current_verse_idx = chapter_verses[chapter_verses['v'] == verse].index[0]
    
    # Get previous and next verse text
    previous_verse = chapter_verses.iloc[current_verse_idx - 1]['t'] if current_verse_idx > 0 else None
    next_verse = chapter_verses.iloc[current_verse_idx + 1]['t'] if current_verse_idx < len(chapter_verses) - 1 else None
    
    summary = book_summary_df[(book_summary_df['book'] == book) & 
                                           (book_summary_df['chapter'] == chapter)]
    
    chapter_summary = summary['summary'].values[0] if not summary.empty else "No summary available"
    
    metadata = {
        'book': book,
        'chapter': chapter,
        'verse': verse,
        'text': text,
        'previous_verse': f"{book} {chapter}:{verse-1} - {previous_verse}" if previous_verse else 'None',
        'next_verse': f"{book} {chapter}:{verse+1} - {next_verse}" if next_verse else 'None',
        'chapter_summary': chapter_summary
    }
    
    # Add to batch
    documents_batch.append(text)
    ids_batch.append(f"{v[0]}")
    metadatas_batch.append(metadata)
    
    current_index = idx
    
    # When batch is full or it's the last item, add to collection
    if len(documents_batch) == batch_size or idx == len(bible_df) - 1:
        result = client_ai.models.embed_content(
            model="models/text-embedding-004",
            contents=documents_batch,
            config=types.EmbedContentConfig(task_type="RETRIEVAL_DOCUMENT")
        )
        
        if not result.embeddings:
            print("No embeddings returned for the batch.")
            continue
    
        valid_embeddings = []
        valid_documents = []
        valid_ids = []
        valid_metadatas = []

        for i, embedding in enumerate(result.embeddings):
            if embedding.values is not None:
                valid_embeddings.append(embedding.values)
                valid_documents.append(documents_batch[i])
                valid_ids.append(ids_batch[i])
                valid_metadatas.append(metadatas_batch[i])
            else:
                print(f"Warning: Embedding for document {ids_batch[i]} is None, skipping")
    
        if valid_embeddings:  # Only add if we have valid embeddings
            collection.add(
                documents=valid_documents,
                ids=valid_ids,
                metadatas=valid_metadatas,
                embeddings=valid_embeddings
        )
        
        print(f"Added batch of {len(documents_batch)} documents")
        
        # Clear batches for next iteration
        documents_batch = []
        ids_batch = []
        metadatas_batch = []


  1%|          | 182/31102 [00:02<07:08, 72.22it/s]

Added batch of 100 documents


  1%|          | 276/31102 [00:04<08:19, 61.77it/s]

Added batch of 100 documents


  1%|          | 369/31102 [00:06<07:23, 69.31it/s]

Added batch of 100 documents


  1%|▏         | 447/31102 [00:07<07:31, 67.89it/s]

Added batch of 100 documents


  2%|▏         | 561/31102 [00:09<07:10, 70.90it/s]

Added batch of 100 documents


  2%|▏         | 652/31102 [00:10<06:50, 74.21it/s]

Added batch of 100 documents


  2%|▏         | 750/31102 [00:11<06:20, 79.72it/s]

Added batch of 100 documents


  3%|▎         | 851/31102 [00:13<06:08, 82.07it/s]

Added batch of 100 documents


  3%|▎         | 933/31102 [00:14<06:53, 72.92it/s]

Added batch of 100 documents


  3%|▎         | 1045/31102 [00:16<06:32, 76.57it/s]

Added batch of 100 documents


  4%|▎         | 1152/31102 [00:17<05:54, 84.55it/s]

Added batch of 100 documents


  4%|▍         | 1258/31102 [00:19<05:33, 89.57it/s]

Added batch of 100 documents


  4%|▍         | 1344/31102 [00:20<06:21, 78.01it/s]

Added batch of 100 documents


  5%|▍         | 1448/31102 [00:21<05:55, 83.53it/s]

Added batch of 100 documents


  5%|▌         | 1556/31102 [00:23<05:56, 82.79it/s]

Added batch of 100 documents


  5%|▌         | 1652/31102 [00:25<06:25, 76.40it/s]

Added batch of 100 documents


  6%|▌         | 1768/31102 [00:26<06:08, 79.71it/s]

Added batch of 100 documents


  6%|▌         | 1843/31102 [00:28<06:40, 73.12it/s]

Added batch of 100 documents


  6%|▌         | 1943/31102 [00:29<06:12, 78.25it/s]

Added batch of 100 documents


  7%|▋         | 2050/31102 [00:30<05:52, 82.44it/s]

Added batch of 100 documents


  7%|▋         | 2155/31102 [00:32<05:24, 89.29it/s]

Added batch of 100 documents


  7%|▋         | 2236/31102 [00:33<06:14, 77.07it/s]

Added batch of 100 documents


  8%|▊         | 2354/31102 [00:35<05:32, 86.52it/s]

Added batch of 100 documents


  8%|▊         | 2423/31102 [00:36<06:43, 71.16it/s]

Added batch of 100 documents


  8%|▊         | 2586/31102 [00:37<04:49, 98.57it/s] 

Added batch of 100 documents


  9%|▊         | 2683/31102 [00:39<05:07, 92.45it/s]

Added batch of 100 documents


  9%|▉         | 2784/31102 [00:40<05:14, 89.92it/s]

Added batch of 100 documents


  9%|▉         | 2876/31102 [00:42<05:36, 83.81it/s]

Added batch of 100 documents


 10%|▉         | 2957/31102 [00:43<06:10, 76.01it/s]

Added batch of 100 documents


 10%|▉         | 3054/31102 [00:44<05:51, 79.78it/s]

Added batch of 100 documents


 10%|█         | 3155/31102 [00:46<05:45, 80.95it/s]

Added batch of 100 documents


 10%|█         | 3253/31102 [00:47<05:45, 80.52it/s]

Added batch of 100 documents


 11%|█         | 3359/31102 [00:49<05:13, 88.60it/s]

Added batch of 100 documents


 11%|█         | 3456/31102 [00:50<05:16, 87.32it/s]

Added batch of 100 documents


 11%|█▏        | 3543/31102 [00:51<05:45, 79.73it/s]

Added batch of 100 documents


 12%|█▏        | 3650/31102 [00:53<05:57, 76.70it/s]

Added batch of 100 documents


 12%|█▏        | 3756/31102 [00:54<05:31, 82.46it/s]

Added batch of 100 documents


 12%|█▏        | 3842/31102 [00:56<06:07, 74.11it/s]

Added batch of 100 documents


 13%|█▎        | 3969/31102 [00:57<05:40, 79.76it/s]

Added batch of 100 documents


 13%|█▎        | 4052/31102 [00:59<05:49, 77.29it/s]

Added batch of 100 documents


 13%|█▎        | 4156/31102 [01:00<05:15, 85.51it/s]

Added batch of 100 documents


 14%|█▎        | 4248/31102 [01:02<05:21, 83.44it/s]

Added batch of 100 documents


 14%|█▍        | 4356/31102 [01:03<04:55, 90.51it/s]

Added batch of 100 documents


 14%|█▍        | 4455/31102 [01:04<04:55, 90.25it/s]

Added batch of 100 documents


 15%|█▍        | 4554/31102 [01:06<05:07, 86.22it/s]

Added batch of 100 documents


 15%|█▍        | 4660/31102 [01:07<05:00, 88.09it/s]

Added batch of 100 documents


 15%|█▌        | 4741/31102 [01:08<05:36, 78.23it/s]

Added batch of 100 documents


 16%|█▌        | 4887/31102 [01:10<04:07, 105.74it/s]

Added batch of 100 documents


 16%|█▌        | 4961/31102 [01:11<05:26, 80.05it/s] 

Added batch of 100 documents


 16%|█▋        | 5076/31102 [01:13<05:16, 82.36it/s]

Added batch of 100 documents


 17%|█▋        | 5136/31102 [01:14<06:45, 64.04it/s]

Added batch of 100 documents


 17%|█▋        | 5244/31102 [01:16<06:37, 65.09it/s]

Added batch of 100 documents


 17%|█▋        | 5371/31102 [01:17<05:07, 83.63it/s]

Added batch of 100 documents


 18%|█▊        | 5443/31102 [01:19<05:57, 71.74it/s]

Added batch of 100 documents


 18%|█▊        | 5552/31102 [01:20<05:09, 82.44it/s]

Added batch of 100 documents


 18%|█▊        | 5650/31102 [01:22<05:14, 80.91it/s]

Added batch of 100 documents


 18%|█▊        | 5727/31102 [01:23<05:53, 71.81it/s]

Added batch of 100 documents


 19%|█▉        | 5856/31102 [01:25<05:13, 80.61it/s]

Added batch of 100 documents


 19%|█▉        | 5959/31102 [01:26<05:03, 82.84it/s]

Added batch of 100 documents


 19%|█▉        | 6037/31102 [01:27<05:41, 73.41it/s]

Added batch of 100 documents


 20%|█▉        | 6191/31102 [01:29<04:19, 95.84it/s] 

Added batch of 100 documents


 20%|██        | 6283/31102 [01:30<04:46, 86.69it/s]

Added batch of 100 documents


 21%|██        | 6378/31102 [01:32<04:57, 83.18it/s]

Added batch of 100 documents


 21%|██        | 6468/31102 [01:33<04:58, 82.44it/s]

Added batch of 100 documents


 21%|██        | 6564/31102 [01:35<04:59, 81.84it/s]

Added batch of 100 documents


 21%|██▏       | 6641/31102 [01:36<05:26, 74.96it/s]

Added batch of 100 documents


 22%|██▏       | 6745/31102 [01:37<05:07, 79.16it/s]

Added batch of 100 documents


 22%|██▏       | 6845/31102 [01:39<04:57, 81.58it/s]

Added batch of 100 documents


 22%|██▏       | 6950/31102 [01:40<04:53, 82.38it/s]

Added batch of 100 documents


 23%|██▎       | 7052/31102 [01:42<04:52, 82.22it/s]

Added batch of 100 documents


 23%|██▎       | 7151/31102 [01:43<04:47, 83.39it/s]

Added batch of 100 documents


 23%|██▎       | 7239/31102 [01:45<05:07, 77.67it/s]

Added batch of 100 documents


 24%|██▎       | 7346/31102 [01:46<04:45, 83.24it/s]

Added batch of 100 documents


 24%|██▍       | 7455/31102 [01:47<04:38, 84.79it/s]

Added batch of 100 documents


 24%|██▍       | 7562/31102 [01:49<04:32, 86.34it/s]

Added batch of 100 documents


 25%|██▍       | 7659/31102 [01:50<04:27, 87.75it/s]

Added batch of 100 documents


 25%|██▍       | 7749/31102 [01:52<04:36, 84.36it/s]

Added batch of 100 documents


 25%|██▌       | 7857/31102 [01:53<04:22, 88.48it/s]

Added batch of 100 documents


 25%|██▌       | 7925/31102 [01:54<05:38, 68.55it/s]

Added batch of 100 documents


 26%|██▌       | 8046/31102 [01:56<04:54, 78.32it/s]

Added batch of 100 documents


 26%|██▌       | 8154/31102 [01:57<04:36, 83.02it/s]

Added batch of 100 documents


 26%|██▋       | 8223/31102 [01:59<05:45, 66.24it/s]

Added batch of 100 documents


 27%|██▋       | 8359/31102 [02:00<04:27, 85.13it/s]

Added batch of 100 documents


 27%|██▋       | 8454/31102 [02:02<04:22, 86.12it/s]

Added batch of 100 documents


 28%|██▊       | 8557/31102 [02:03<04:15, 88.22it/s]

Added batch of 100 documents


 28%|██▊       | 8647/31102 [02:04<04:25, 84.59it/s]

Added batch of 100 documents


 28%|██▊       | 8752/31102 [02:06<04:33, 81.85it/s]

Added batch of 100 documents


 28%|██▊       | 8844/31102 [02:08<04:50, 76.63it/s]

Added batch of 100 documents


 29%|██▉       | 8961/31102 [02:09<04:13, 87.27it/s]

Added batch of 100 documents


 29%|██▉       | 9040/31102 [02:10<04:52, 75.45it/s]

Added batch of 100 documents


 29%|██▉       | 9148/31102 [02:12<04:31, 80.79it/s]

Added batch of 100 documents


 30%|██▉       | 9275/31102 [02:14<04:35, 79.26it/s] 

Added batch of 100 documents


 30%|███       | 9362/31102 [02:15<04:44, 76.50it/s]

Added batch of 100 documents


 30%|███       | 9427/31102 [02:17<05:58, 60.40it/s]

Added batch of 100 documents


 31%|███       | 9581/31102 [02:18<03:49, 93.66it/s]

Added batch of 100 documents


 31%|███       | 9649/31102 [02:20<05:07, 69.72it/s]

Added batch of 100 documents


 31%|███▏      | 9791/31102 [02:21<04:05, 86.79it/s]

Added batch of 100 documents


 32%|███▏      | 9885/31102 [02:23<04:08, 85.25it/s]

Added batch of 100 documents


 32%|███▏      | 9981/31102 [02:24<04:05, 85.95it/s]

Added batch of 100 documents


 32%|███▏      | 10066/31102 [02:26<04:39, 75.24it/s]

Added batch of 100 documents


 33%|███▎      | 10182/31102 [02:27<03:46, 92.45it/s]

Added batch of 100 documents


 33%|███▎      | 10277/31102 [02:28<03:49, 90.61it/s]

Added batch of 100 documents


 33%|███▎      | 10377/31102 [02:30<03:49, 90.40it/s]

Added batch of 100 documents


 34%|███▎      | 10446/31102 [02:31<04:40, 73.69it/s]

Added batch of 100 documents


 34%|███▍      | 10556/31102 [02:33<05:00, 68.37it/s]

Added batch of 100 documents


 34%|███▍      | 10646/31102 [02:34<04:39, 73.10it/s]

Added batch of 100 documents


 35%|███▍      | 10749/31102 [02:36<04:18, 78.77it/s]

Added batch of 100 documents


 35%|███▍      | 10851/31102 [02:37<04:02, 83.52it/s]

Added batch of 100 documents


 35%|███▌      | 10923/31102 [02:39<05:13, 64.29it/s]

Added batch of 100 documents


 36%|███▌      | 11050/31102 [02:40<04:17, 77.83it/s]

Added batch of 100 documents


 36%|███▌      | 11130/31102 [02:42<04:39, 71.33it/s]

Added batch of 100 documents


 36%|███▋      | 11283/31102 [02:43<03:29, 94.45it/s] 

Added batch of 100 documents


 37%|███▋      | 11359/31102 [02:44<04:14, 77.63it/s]

Added batch of 100 documents


 37%|███▋      | 11427/31102 [02:46<04:59, 65.61it/s]

Added batch of 100 documents


 37%|███▋      | 11563/31102 [02:48<04:10, 77.86it/s]

Added batch of 100 documents


 37%|███▋      | 11653/31102 [02:49<04:20, 74.77it/s]

Added batch of 100 documents


 38%|███▊      | 11791/31102 [02:51<03:19, 96.76it/s]

Added batch of 100 documents


 38%|███▊      | 11889/31102 [02:52<03:37, 88.17it/s]

Added batch of 100 documents


 38%|███▊      | 11958/31102 [02:54<04:40, 68.18it/s]

Added batch of 100 documents


 39%|███▉      | 12066/31102 [02:55<04:17, 74.03it/s]

Added batch of 100 documents


 39%|███▉      | 12143/31102 [02:57<04:24, 71.65it/s]

Added batch of 100 documents


 39%|███▉      | 12245/31102 [02:58<03:52, 81.25it/s]

Added batch of 100 documents


 40%|███▉      | 12367/31102 [03:00<04:12, 74.12it/s] 

Added batch of 100 documents


 40%|████      | 12442/31102 [03:01<04:23, 70.91it/s]

Added batch of 100 documents


 40%|████      | 12550/31102 [03:02<03:48, 81.17it/s]

Added batch of 100 documents


 41%|████      | 12688/31102 [03:04<03:26, 89.31it/s] 

Added batch of 100 documents


 41%|████      | 12780/31102 [03:05<03:31, 86.60it/s]

Added batch of 100 documents


 41%|████▏     | 12861/31102 [03:07<03:54, 77.68it/s]

Added batch of 100 documents


 42%|████▏     | 12946/31102 [03:08<04:12, 71.94it/s]

Added batch of 100 documents


 42%|████▏     | 13093/31102 [03:10<03:23, 88.61it/s]

Added batch of 100 documents


 42%|████▏     | 13174/31102 [03:11<03:42, 80.50it/s]

Added batch of 100 documents


 43%|████▎     | 13255/31102 [03:12<03:48, 78.21it/s]

Added batch of 100 documents


 43%|████▎     | 13348/31102 [03:14<03:44, 79.20it/s]

Added batch of 100 documents


 43%|████▎     | 13441/31102 [03:15<03:58, 73.93it/s]

Added batch of 100 documents


 44%|████▎     | 13536/31102 [03:17<03:53, 75.11it/s]

Added batch of 100 documents


 44%|████▍     | 13683/31102 [03:19<03:23, 85.68it/s] 

Added batch of 100 documents


 44%|████▍     | 13769/31102 [03:20<03:31, 81.89it/s]

Added batch of 100 documents


 45%|████▍     | 13847/31102 [03:21<03:47, 75.84it/s]

Added batch of 100 documents


 45%|████▍     | 13944/31102 [03:23<03:34, 79.97it/s]

Added batch of 100 documents


 45%|████▌     | 14047/31102 [03:24<03:41, 76.88it/s]

Added batch of 100 documents


 45%|████▌     | 14146/31102 [03:26<03:36, 78.15it/s]

Added batch of 100 documents


 46%|████▌     | 14253/31102 [03:27<03:17, 85.28it/s]

Added batch of 100 documents


 46%|████▌     | 14346/31102 [03:28<03:26, 81.18it/s]

Added batch of 100 documents


 46%|████▋     | 14449/31102 [03:30<03:30, 79.28it/s]

Added batch of 100 documents


 47%|████▋     | 14543/31102 [03:31<03:34, 77.02it/s]

Added batch of 100 documents


 47%|████▋     | 14642/31102 [03:33<03:31, 77.89it/s]

Added batch of 100 documents


 47%|████▋     | 14751/31102 [03:34<03:12, 85.00it/s]

Added batch of 100 documents


 48%|████▊     | 14845/31102 [03:36<03:19, 81.59it/s]

Added batch of 100 documents


 48%|████▊     | 14951/31102 [03:37<03:10, 84.89it/s]

Added batch of 100 documents


 48%|████▊     | 15023/31102 [03:39<03:58, 67.33it/s]

Added batch of 100 documents


 49%|████▉     | 15167/31102 [03:40<03:01, 87.72it/s]

Added batch of 100 documents


 49%|████▉     | 15250/31102 [03:41<03:19, 79.57it/s]

Added batch of 100 documents


 49%|████▉     | 15340/31102 [03:43<03:19, 79.00it/s]

Added batch of 100 documents


 50%|████▉     | 15440/31102 [03:44<03:13, 80.99it/s]

Added batch of 100 documents


 50%|████▉     | 15520/31102 [03:46<03:56, 66.00it/s]

Added batch of 100 documents


 50%|█████     | 15643/31102 [03:47<03:14, 79.55it/s]

Added batch of 100 documents


 51%|█████     | 15783/31102 [03:49<02:45, 92.42it/s] 

Added batch of 100 documents


 51%|█████     | 15870/31102 [03:50<03:01, 83.95it/s]

Added batch of 100 documents


 51%|█████▏    | 15967/31102 [03:52<03:11, 79.00it/s]

Added batch of 100 documents


 52%|█████▏    | 16055/31102 [03:53<03:19, 75.46it/s]

Added batch of 100 documents


 52%|█████▏    | 16154/31102 [03:55<03:03, 81.24it/s]

Added batch of 100 documents


 52%|█████▏    | 16254/31102 [03:56<03:01, 81.78it/s]

Added batch of 100 documents


 53%|█████▎    | 16346/31102 [03:57<03:04, 79.85it/s]

Added batch of 100 documents


 53%|█████▎    | 16450/31102 [03:59<02:59, 81.61it/s]

Added batch of 100 documents


 53%|█████▎    | 16547/31102 [04:00<02:57, 81.83it/s]

Added batch of 100 documents


 54%|█████▎    | 16652/31102 [04:02<02:49, 85.16it/s]

Added batch of 100 documents


 54%|█████▍    | 16743/31102 [04:03<02:56, 81.35it/s]

Added batch of 100 documents


 54%|█████▍    | 16849/31102 [04:04<02:50, 83.81it/s]

Added batch of 100 documents


 54%|█████▍    | 16922/31102 [04:06<03:22, 70.06it/s]

Added batch of 100 documents


 55%|█████▍    | 17044/31102 [04:07<03:00, 77.73it/s]

Added batch of 100 documents


 55%|█████▌    | 17145/31102 [04:09<02:54, 79.82it/s]

Added batch of 100 documents


 55%|█████▌    | 17242/31102 [04:10<02:47, 82.73it/s]

Added batch of 100 documents


 56%|█████▌    | 17347/31102 [04:11<02:40, 85.85it/s]

Added batch of 100 documents


 56%|█████▌    | 17436/31102 [04:13<02:52, 79.23it/s]

Added batch of 100 documents


 56%|█████▋    | 17565/31102 [04:14<02:47, 80.74it/s] 

Added batch of 100 documents


 57%|█████▋    | 17652/31102 [04:16<02:54, 77.03it/s]

Added batch of 100 documents


 57%|█████▋    | 17741/31102 [04:17<02:56, 75.64it/s]

Added batch of 100 documents


 57%|█████▋    | 17846/31102 [04:19<02:44, 80.61it/s]

Added batch of 100 documents


 58%|█████▊    | 17948/31102 [04:20<02:49, 77.64it/s]

Added batch of 100 documents


 58%|█████▊    | 18057/31102 [04:22<02:44, 79.37it/s]

Added batch of 100 documents


 58%|█████▊    | 18145/31102 [04:23<02:51, 75.57it/s]

Added batch of 100 documents


 59%|█████▉    | 18288/31102 [04:25<02:06, 101.19it/s]

Added batch of 100 documents


 59%|█████▉    | 18368/31102 [04:26<02:43, 77.85it/s] 

Added batch of 100 documents


 59%|█████▉    | 18446/31102 [04:28<03:08, 67.26it/s]

Added batch of 100 documents


 60%|█████▉    | 18554/31102 [04:30<02:44, 76.28it/s]

Added batch of 100 documents


 60%|█████▉    | 18648/31102 [04:31<02:39, 77.97it/s]

Added batch of 100 documents


 60%|██████    | 18749/31102 [04:32<02:33, 80.30it/s]

Added batch of 100 documents


 61%|██████    | 18844/31102 [04:34<02:35, 78.70it/s]

Added batch of 100 documents


 61%|██████    | 18951/31102 [04:35<02:25, 83.58it/s]

Added batch of 100 documents


 61%|██████    | 19047/31102 [04:37<02:34, 78.16it/s]

Added batch of 100 documents


 62%|██████▏   | 19143/31102 [04:38<02:31, 78.71it/s]

Added batch of 100 documents


 62%|██████▏   | 19295/31102 [04:40<02:01, 97.12it/s] 

Added batch of 100 documents


 62%|██████▏   | 19394/31102 [04:41<02:07, 92.13it/s]

Added batch of 100 documents


 63%|██████▎   | 19492/31102 [04:42<02:08, 90.31it/s]

Added batch of 100 documents


 63%|██████▎   | 19592/31102 [04:44<02:07, 90.46it/s]

Added batch of 100 documents


 63%|██████▎   | 19678/31102 [04:45<02:20, 81.07it/s]

Added batch of 100 documents


 64%|██████▎   | 19768/31102 [04:46<02:19, 81.19it/s]

Added batch of 100 documents


 64%|██████▍   | 19828/31102 [04:48<03:14, 57.86it/s]

Added batch of 100 documents


 64%|██████▍   | 19948/31102 [04:50<03:30, 53.06it/s]

Added batch of 100 documents


 65%|██████▍   | 20070/31102 [04:52<02:45, 66.70it/s]

Added batch of 100 documents


 65%|██████▍   | 20137/31102 [04:54<03:00, 60.71it/s]

Added batch of 100 documents


 65%|██████▌   | 20266/31102 [04:55<02:21, 76.68it/s]

Added batch of 100 documents


 65%|██████▌   | 20347/31102 [04:57<02:47, 64.33it/s]

Added batch of 100 documents


 66%|██████▌   | 20450/31102 [04:59<02:32, 69.62it/s]

Added batch of 100 documents


 66%|██████▌   | 20541/31102 [05:00<02:26, 71.91it/s]

Added batch of 100 documents


 67%|██████▋   | 20695/31102 [05:02<01:39, 104.49it/s]

Added batch of 100 documents


 67%|██████▋   | 20785/31102 [05:03<01:55, 89.02it/s] 

Added batch of 100 documents


 67%|██████▋   | 20874/31102 [05:05<02:12, 77.19it/s]

Added batch of 100 documents


 67%|██████▋   | 20962/31102 [05:06<02:16, 74.24it/s]

Added batch of 100 documents


 68%|██████▊   | 21026/31102 [05:08<03:20, 50.21it/s]

Added batch of 100 documents


 68%|██████▊   | 21166/31102 [05:10<02:34, 64.18it/s]

Added batch of 100 documents


 68%|██████▊   | 21246/31102 [05:12<02:30, 65.42it/s]

Added batch of 100 documents


 69%|██████▊   | 21349/31102 [05:13<02:06, 76.80it/s]

Added batch of 100 documents


 69%|██████▉   | 21423/31102 [05:15<02:30, 64.48it/s]

Added batch of 100 documents


 69%|██████▉   | 21552/31102 [05:16<01:58, 80.91it/s]

Added batch of 100 documents


 70%|██████▉   | 21648/31102 [05:18<02:00, 78.21it/s]

Added batch of 100 documents


 70%|██████▉   | 21746/31102 [05:19<02:00, 77.71it/s]

Added batch of 100 documents


 70%|███████   | 21843/31102 [05:20<01:56, 79.66it/s]

Added batch of 100 documents


 71%|███████   | 21934/31102 [05:22<02:00, 75.91it/s]

Added batch of 100 documents


 71%|███████   | 22050/31102 [05:24<02:00, 75.36it/s]

Added batch of 100 documents


 71%|███████   | 22145/31102 [05:25<01:54, 78.11it/s]

Added batch of 100 documents


 71%|███████▏  | 22236/31102 [05:26<02:00, 73.37it/s]

Added batch of 100 documents


 72%|███████▏  | 22386/31102 [05:28<01:32, 93.92it/s] 

Added batch of 100 documents


 72%|███████▏  | 22465/31102 [05:29<01:45, 81.62it/s]

Added batch of 100 documents


 73%|███████▎  | 22549/31102 [05:31<02:02, 70.04it/s]

Added batch of 100 documents


 73%|███████▎  | 22648/31102 [05:32<01:50, 76.27it/s]

Added batch of 100 documents


 73%|███████▎  | 22745/31102 [05:34<01:48, 77.21it/s]

Added batch of 100 documents


 73%|███████▎  | 22851/31102 [05:35<01:40, 82.26it/s]

Added batch of 100 documents


 74%|███████▍  | 22946/31102 [05:37<01:44, 78.07it/s]

Added batch of 100 documents


 74%|███████▍  | 23052/31102 [05:38<01:41, 78.99it/s]

Added batch of 100 documents


 74%|███████▍  | 23146/31102 [05:40<01:41, 78.39it/s]

Added batch of 100 documents


 75%|███████▍  | 23244/31102 [05:41<01:36, 81.32it/s]

Added batch of 100 documents


 75%|███████▌  | 23344/31102 [05:42<01:36, 80.45it/s]

Added batch of 100 documents


 75%|███████▌  | 23426/31102 [05:44<01:45, 72.75it/s]

Added batch of 100 documents


 76%|███████▌  | 23569/31102 [05:45<01:23, 90.08it/s] 

Added batch of 100 documents


 76%|███████▌  | 23643/31102 [05:47<01:41, 73.69it/s]

Added batch of 100 documents


 76%|███████▋  | 23791/31102 [05:48<01:26, 84.46it/s] 

Added batch of 100 documents


 77%|███████▋  | 23884/31102 [05:50<01:29, 80.44it/s]

Added batch of 100 documents


 77%|███████▋  | 23972/31102 [05:52<01:44, 68.28it/s]

Added batch of 100 documents


 77%|███████▋  | 24059/31102 [05:54<01:49, 64.11it/s]

Added batch of 100 documents


 78%|███████▊  | 24140/31102 [05:55<01:49, 63.32it/s]

Added batch of 100 documents


 78%|███████▊  | 24286/31102 [05:57<01:32, 73.72it/s]

Added batch of 100 documents


 78%|███████▊  | 24380/31102 [05:58<01:26, 77.77it/s]

Added batch of 100 documents


 79%|███████▊  | 24470/31102 [06:00<01:24, 78.22it/s]

Added batch of 100 documents


 79%|███████▉  | 24550/31102 [06:01<01:25, 76.24it/s]

Added batch of 100 documents


 79%|███████▉  | 24641/31102 [06:02<01:23, 77.70it/s]

Added batch of 100 documents


 80%|███████▉  | 24738/31102 [06:04<01:23, 76.01it/s]

Added batch of 100 documents


 80%|███████▉  | 24870/31102 [06:06<01:19, 78.25it/s] 

Added batch of 100 documents


 80%|████████  | 24951/31102 [06:07<01:22, 74.24it/s]

Added batch of 100 documents


 81%|████████  | 25057/31102 [06:09<01:16, 79.36it/s]

Added batch of 100 documents


 81%|████████  | 25156/31102 [06:10<01:13, 80.79it/s]

Added batch of 100 documents


 81%|████████  | 25243/31102 [06:11<01:14, 78.13it/s]

Added batch of 100 documents


 82%|████████▏ | 25368/31102 [06:13<01:01, 93.52it/s]

Added batch of 100 documents


 82%|████████▏ | 25447/31102 [06:14<01:12, 77.77it/s]

Added batch of 100 documents


 82%|████████▏ | 25545/31102 [06:16<01:09, 79.64it/s]

Added batch of 100 documents


 83%|████████▎ | 25674/31102 [06:17<00:59, 91.42it/s] 

Added batch of 100 documents


 83%|████████▎ | 25764/31102 [06:18<01:02, 85.17it/s]

Added batch of 100 documents


 83%|████████▎ | 25850/31102 [06:20<01:07, 78.39it/s]

Added batch of 100 documents


 83%|████████▎ | 25946/31102 [06:21<01:03, 81.67it/s]

Added batch of 100 documents


 84%|████████▍ | 26048/31102 [06:23<01:08, 74.07it/s]

Added batch of 100 documents


 84%|████████▍ | 26141/31102 [06:24<01:05, 75.47it/s]

Added batch of 100 documents


 84%|████████▍ | 26277/31102 [06:26<00:56, 85.76it/s] 

Added batch of 100 documents


 85%|████████▍ | 26334/31102 [06:27<01:18, 60.55it/s]

Added batch of 100 documents


 85%|████████▌ | 26490/31102 [06:29<00:52, 88.03it/s]

Added batch of 100 documents


 85%|████████▌ | 26583/31102 [06:30<00:54, 82.45it/s]

Added batch of 100 documents


 86%|████████▌ | 26670/31102 [06:32<00:56, 78.52it/s]

Added batch of 100 documents


 86%|████████▌ | 26761/31102 [06:33<00:53, 81.20it/s]

Added batch of 100 documents


 86%|████████▋ | 26850/31102 [06:35<00:55, 76.26it/s]

Added batch of 100 documents


 87%|████████▋ | 26936/31102 [06:36<01:04, 64.43it/s]

Added batch of 100 documents


 87%|████████▋ | 27084/31102 [06:38<00:50, 80.20it/s]

Added batch of 100 documents


 87%|████████▋ | 27179/31102 [06:40<00:47, 83.16it/s]

Added batch of 100 documents


 88%|████████▊ | 27275/31102 [06:41<00:46, 81.72it/s]

Added batch of 100 documents


 88%|████████▊ | 27378/31102 [06:42<00:43, 85.39it/s]

Added batch of 100 documents


 88%|████████▊ | 27479/31102 [06:44<00:41, 86.44it/s]

Added batch of 100 documents


 89%|████████▊ | 27575/31102 [06:46<00:46, 75.95it/s]

Added batch of 100 documents


 89%|████████▉ | 27674/31102 [06:47<00:41, 82.14it/s]

Added batch of 100 documents


 89%|████████▉ | 27754/31102 [06:49<00:49, 68.26it/s]

Added batch of 100 documents


 90%|████████▉ | 27851/31102 [06:50<00:45, 71.69it/s]

Added batch of 100 documents


 90%|████████▉ | 27950/31102 [06:52<00:42, 74.76it/s]

Added batch of 100 documents


 90%|█████████ | 28039/31102 [06:54<00:46, 66.21it/s]

Added batch of 100 documents


 91%|█████████ | 28186/31102 [06:55<00:33, 87.92it/s]

Added batch of 100 documents


 91%|█████████ | 28276/31102 [06:56<00:33, 84.22it/s]

Added batch of 100 documents


 91%|█████████ | 28360/31102 [06:58<00:34, 79.65it/s]

Added batch of 100 documents


 91%|█████████▏| 28454/31102 [06:59<00:33, 79.66it/s]

Added batch of 100 documents


 92%|█████████▏| 28545/31102 [07:01<00:33, 75.82it/s]

Added batch of 100 documents


 92%|█████████▏| 28679/31102 [07:02<00:28, 83.83it/s] 

Added batch of 100 documents


 92%|█████████▏| 28762/31102 [07:04<00:31, 74.51it/s]

Added batch of 100 documents


 93%|█████████▎| 28840/31102 [07:05<00:31, 72.09it/s]

Added batch of 100 documents


 93%|█████████▎| 28936/31102 [07:06<00:34, 62.66it/s] 

Added batch of 100 documents


 93%|█████████▎| 29022/31102 [07:08<00:34, 60.34it/s]

Added batch of 100 documents


 94%|█████████▎| 29146/31102 [07:10<00:26, 74.17it/s]

Added batch of 100 documents


 94%|█████████▍| 29244/31102 [07:11<00:25, 73.79it/s]

Added batch of 100 documents


 95%|█████████▍| 29393/31102 [07:13<00:18, 91.72it/s] 

Added batch of 100 documents


 95%|█████████▍| 29489/31102 [07:14<00:17, 90.77it/s]

Added batch of 100 documents


 95%|█████████▌| 29584/31102 [07:15<00:17, 87.16it/s]

Added batch of 100 documents


 95%|█████████▌| 29665/31102 [07:17<00:19, 73.55it/s]

Added batch of 100 documents


 96%|█████████▌| 29752/31102 [07:18<00:18, 74.98it/s]

Added batch of 100 documents


 96%|█████████▌| 29855/31102 [07:20<00:16, 77.38it/s]

Added batch of 100 documents


 96%|█████████▋| 29950/31102 [07:21<00:14, 78.59it/s]

Added batch of 100 documents


 97%|█████████▋| 30043/31102 [07:23<00:13, 77.40it/s]

Added batch of 100 documents


 97%|█████████▋| 30146/31102 [07:24<00:11, 80.65it/s]

Added batch of 100 documents


 97%|█████████▋| 30251/31102 [07:26<00:10, 82.18it/s]

Added batch of 100 documents


 98%|█████████▊| 30387/31102 [07:27<00:07, 100.96it/s]

Added batch of 100 documents


 98%|█████████▊| 30473/31102 [07:29<00:07, 79.30it/s] 

Added batch of 100 documents


 98%|█████████▊| 30557/31102 [07:30<00:06, 78.69it/s]

Added batch of 100 documents


 99%|█████████▊| 30649/31102 [07:31<00:05, 79.26it/s]

Added batch of 100 documents


 99%|█████████▉| 30754/31102 [07:33<00:04, 80.52it/s]

Added batch of 100 documents


 99%|█████████▉| 30857/31102 [07:35<00:03, 77.27it/s]

Added batch of 100 documents


 99%|█████████▉| 30946/31102 [07:36<00:02, 75.07it/s]

Added batch of 100 documents


100%|█████████▉| 31046/31102 [07:38<00:00, 74.68it/s]

Added batch of 100 documents


100%|█████████▉| 31100/31102 [07:39<00:00, 57.46it/s]

Added batch of 100 documents


100%|██████████| 31102/31102 [07:39<00:00, 67.63it/s]

Added batch of 2 documents





In [60]:
collection = client_db.get_collection(name="bible_wbt", embedding_function=google_ef)
collection.query(
    query_texts=["Who made the world?"],
    n_results=5
)

{'ids': [['9031012', '9031009', '13010009', '9023027', '9017038']],
 'distances': [[1.8080245, 1.8217556, 1.8232312, 1.8242537, 1.8303963]],
 'embeddings': None,
 'metadatas': [[{'chapter_summary': "This chapter describes the defeat of the Israelites by the Philistines in a battle on Mount Gilboa. King Saul's sons, Jonathan, Abinadab, and Malchi-shua, were killed. Saul himself was severely wounded by archers. He asked his armor-bearer to kill him to prevent capture and abuse by the Philistines, but the armor-bearer refused out of fear. Saul then took his own sword and fell upon it, killing himself. His armor-bearer followed suit and died with him. Saul and his three sons died that same day.\n\nThe Philistines stripped the slain and found Saul and his sons on Mount Gilboa. They cut off Saul's head, stripped off his armor, and sent these trophies to their land to be displayed in their temples and among their people. Saul's armor was placed in the house of Ashtaroth, and his body was fast