### Oracle AI Vector Search: adding more docs to AI Vector Search

In [1]:
import logging
from glob import glob

import oracledb

# to compute embeddings vectors
from oci_cohere_embeddings_utils import OCIGenAIEmbeddingsWithBatch
from langchain_community.vectorstores import oraclevs
from langchain_community.vectorstores.oraclevs import OracleVS
from langchain_community.vectorstores.utils import DistanceStrategy

# the class to integrate OCI AI Vector Search with LangChain
from chunk_index_utils import load_book_and_split
from utils import enable_tracing
from config import OCI_EMBED_MODEL, ENDPOINT
from config_private import COMPARTMENT_ID, DB_USER, DB_PWD, DB_HOST_IP, DB_SERVICE

#### Setup

In [2]:
#
# Some configurations
#

# directory where our Knowledge base is contained in txt files
BOOKS_DIR = "./books"

# to connect to DB
username = DB_USER
password = DB_PWD
dsn = f"{DB_HOST_IP}:1521/{DB_SERVICE}"

# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

embed_model = OCIGenAIEmbeddingsWithBatch(
    # this code is done to be run in OCI DS.
    # If outside replace with API_KEY and provide API_KEYS
    # auth_type = "RESOURCE_PRINCIPAL"
    auth_type="API_KEY",
    model_id=OCI_EMBED_MODEL,
    service_endpoint=ENDPOINT,
    compartment_id=COMPARTMENT_ID,
)

enable_tracing()

In [3]:
# this is the file list containing the Knowledge base

# put the file to add here
file_list = ["oracle-ai-vector-search-users-guide.pdf"]

print(f"There are {len(file_list)} files to be loaded...")

for f_name in file_list:
    print(f_name)

There are 1 files to be loaded...
oracle-ai-vector-search-users-guide.pdf


#### Load all files and then splits in chunks

In [4]:
docs = []

for f_name in file_list:
    f_name = BOOKS_DIR + "/" + f_name

    docs.extend(load_book_and_split(f_name))

2024-05-14 17:18:53,852 - Loaded 351 chunks...


#### Vector Store and load vectors + embeddings in the DB

In [5]:
try:
    connection = oracledb.connect(user=username, password=password, dsn=dsn)
    print("Connection successful!")

    v_store = OracleVS(
        client=connection,
        table_name="ORACLE_KNOWLEDGE",
        distance_strategy=DistanceStrategy.COSINE,
        embedding_function=embed_model,
    )

    # add the documents
    v_store.add_documents(docs)

except Exception as e:
    print("Connection failed!")
    print(e)

Connection successful!


  0%|          | 0/4 [00:00<?, ?it/s]

#### Do a query for test