## Setup Oracle Vector Store

Oracle Database を Vector Store として活用するための設定を行います。

In [None]:
import os
from dotenv import load_dotenv, find_dotenv
import glob

import oracledb

from langfuse import Langfuse
from langfuse.callback import CallbackHandler

from langchain_community.vectorstores import OracleVS
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores.utils import DistanceStrategy
from langchain_community.document_loaders.oracleai import OracleTextSplitter
from langchain_community.embeddings.oci_generative_ai import OCIGenAIEmbeddings

必要な環境変数を `.env` から読み込みます

In [None]:
_ = load_dotenv(find_dotenv())

# Oracle Database
un = os.getenv("ORACLE_USERNAME")
pw = os.getenv("ORACLE_PASSWORD")
dsn = os.getenv("ORACLE_DSN")
config_dir = "/tmp/wallet"
wallet_location = "/tmp/wallet"
wallet_password = os.getenv("WALLET_PASSWORD")
table_name = os.getenv("TABLE_NAME")

# OCI
compartment_id = os.getenv("COMPARTMENT_ID")
service_endpoint = os.getenv("SERVICE_ENDPOINT")

# Langfuse
secret_key = os.getenv("LANGFUSE_SECRET_KEY")
public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
langfuse_host = os.getenv("LANGFUSE_HOST")

Langfuse のクライアントを初期化する

In [None]:
langfuse = Langfuse(
    secret_key=secret_key,
    public_key=public_key,
    host=langfuse_host
)
langfuse_handler = CallbackHandler(
    secret_key=secret_key,
    public_key=public_key,
    host=langfuse_host
)

1回目の場合は以下セルも実行

In [None]:
with oracledb.connect(
    user=un,
    password=pw,
    dsn=dsn,
    config_dir=config_dir,
    wallet_location=wallet_location,
    wallet_password=wallet_password
) as connection:
    # べき等性が保たれるようにテーブルを削除する
    cursor = connection.cursor()
    cursor.execute(
        statement=f"""
            drop table if exists {table_name}
        """
    )
    embeddings = OCIGenAIEmbeddings(
        auth_type="INSTANCE_PRINCIPAL",
        model_id="cohere.embed-multilingual-v3.0",
        service_endpoint=service_endpoint,
        compartment_id=compartment_id,
    )
    oracle_vs = OracleVS(
        client=connection,
        embedding_function=embeddings,
        table_name=table_name,
        distance_strategy=DistanceStrategy.COSINE,
    )
    
    files = glob.glob("../data/thinkit/*.txt")
    
    documents = []
    for file in files:
        loader = TextLoader(file_path=file)
        document = loader.load()
        documents.extend(document)
    
    # see: https://docs.oracle.com/en/database/oracle/oracle-database/23/sqlrf/vector_chunks.html#SQLRF-GUID-5927E2FA-6419-4744-A7CB-3E62DBB027AD
    params = {
        "split": "recursively",
        "max": 400,
        "by": "characters",
        "overlap": 80,
        "normalize": "all",
        "language": "JAPANESE"
    }
    splitter = OracleTextSplitter(conn=connection, params=params)
    
    chunks = splitter.split_documents(documents=documents)
    oracle_vs.add_documents(documents=chunks)

In [None]:
with oracledb.connect(
    user=un,
    password=pw,
    dsn=dsn,
    config_dir=config_dir,
    wallet_location=wallet_location,
    wallet_password=wallet_password
) as connection:
    oracle_vs = OracleVS(
        client=connection,
        embedding_function=embeddings,
        table_name="OCHAT",
        distance_strategy=DistanceStrategy.COSINE,
        query="What is a Oracle Database"
    )
    result = oracle_vs.similarity_search(
        query="OCHaCafeってなんですか？",
        k=5
    )
    print(result)