In [33]:
import logging
import sys

from typing import List, Any, Optional, Dict, Tuple
from llama_index.vector_stores.types import (
    VectorStore,
    VectorStoreQuery,
    VectorStoreQueryResult,
)
from llama_index.node_parser import (
    HierarchicalNodeParser,
    SentenceSplitter,
)

from llama_index import StorageContext, VectorStoreIndex, ServiceContext
from llama_index.storage.docstore import SimpleDocumentStore
from llama_index.schema import TextNode, BaseNode, Document, NodeRelationship
from llama_index import SimpleDirectoryReader
from llama_index.retrievers.auto_merging_retriever import AutoMergingRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.node_parser import get_leaf_nodes, get_root_nodes
from llama_index.response.notebook_utils import display_source_node

import oci
import ads
from ads.llm import GenerativeAIEmbeddings, GenerativeAI
import oracledb

from config import INPUT_FILES
from config_private import COMPARTMENT_OCID, ENDPOINT

from oci_utils import load_oci_config

# this is the class realized for the integration with Llama-index
from oracle_vector_db import OracleVectorStore

In [2]:
# setup
oci_config = load_oci_config()

# need to do this way
api_keys_config = ads.auth.api_keys(oci_config)

# english, or for other language use: multilingual
MODEL_NAME = "cohere.embed-english-v3.0"

embed_model = GenerativeAIEmbeddings(
    compartment_id=COMPARTMENT_OCID,
    model=MODEL_NAME,
    auth=ads.auth.api_keys(oci_config),
    # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
    client_kwargs={"service_endpoint": ENDPOINT},
)

In [3]:
pages = SimpleDirectoryReader(
    input_files=["./oracle-database-23c-new-features-guide.pdf"]
).load_data()

In [4]:
doc_text = "\n\n".join([d.get_content() for d in pages])
docs = [Document(text=doc_text)]

In [5]:
chunk_sizes = [2048, 512, 256]

node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes)
nodes = node_parser.get_nodes_from_documents(docs)

In [6]:
len(nodes)

319

In [7]:
nodes[0]

TextNode(id_='5856c646-d92e-41ff-a632-599d3ddf717b', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='abad93b4-4cef-40bd-86d1-25c5709f8e0e', node_type=<ObjectType.DOCUMENT: '4'>, metadata={}, hash='b6a01ee6acd5b2346caee0a25f4a5ef83e9cfa20eeeedc0728f3f0176bf5e72d'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='4f4e7c07-a5ce-489f-a648-8306436030a3', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='2ee2b9fbfa585a3f05e4370aac3df369e66c184cdf328320c07bf5f42fef9fb3'), <NodeRelationship.CHILD: '5'>: [RelatedNodeInfo(node_id='3aafc0d1-7d04-41ff-bab1-9ebf674d385f', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='3a586e9a0ec6052ef464abbfd1797afe0b8900ae8d1781ef3662f1b2fa5eeb45'), RelatedNodeInfo(node_id='61315c34-a4c1-464e-bb04-94db081e1657', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='396a3403315b83b92d024bf1f8163d3890f68dd423112b053d7e66709bc18ffd'), R

In [13]:
for node in nodes:
    if node.id_ == "3aafc0d1-7d04-41ff-bab1-9ebf674d385f":
        print(node.text)

Oracle® Database
Oracle Database New Features
Release 23c
F48428-15
October 2023


Oracle Database Oracle Database New Features, Release 23c
F48428-15
Copyright © 2022, 2023, Oracle and/or its affiliates.
This software and related documentation are provided under a license agreement containing restrictions on
use and disclosure and are protected by intellectual property laws. Except as expressly permitted in your
license agreement or allowed by law, you may not use, copy, reproduce, translate, broadcast, modify, license,
transmit, distribute, exhibit, perform, publish, or display any part, in any form, or by any means. Reverse
engineering, disassembly, or decompilation of this software, unless required by law for interoperability, is
prohibited.
The information contained herein is subject to change without notice and is not warranted to be error-free. If
you find any errors, please report them to us in writing.
If this is software, software documentation, data (as defined in the Federa

In [16]:
leaf_nodes = get_leaf_nodes(nodes)

In [15]:
print(leaf_nodes[0].text)

Oracle® Database
Oracle Database New Features
Release 23c
F48428-15
October 2023


Oracle Database Oracle Database New Features, Release 23c
F48428-15
Copyright © 2022, 2023, Oracle and/or its affiliates.
This software and related documentation are provided under a license agreement containing restrictions on
use and disclosure and are protected by intellectual property laws. Except as expressly permitted in your
license agreement or allowed by law, you may not use, copy, reproduce, translate, broadcast, modify, license,
transmit, distribute, exhibit, perform, publish, or display any part, in any form, or by any means. Reverse
engineering, disassembly, or decompilation of this software, unless required by law for interoperability, is
prohibited.
The information contained herein is subject to change without notice and is not warranted to be error-free. If
you find any errors, please report them to us in writing.
If this is software, software documentation, data (as defined in the Federa

In [36]:
leaf_nodes[0].relationships[NodeRelationship.PARENT].node_id

'3aafc0d1-7d04-41ff-bab1-9ebf674d385f'

In [11]:
leaf_nodes[0].id_

'1916bc4b-aee6-4576-a867-59b43ea26b8c'

In [None]:
leaf_nodes[0].node_id

In [None]:
type(leaf_nodes[0])

In [None]:
v_store = OracleVectorStore(verbose=True)

In [None]:
llm_oci = GenerativeAI(
    compartment_id=COMPARTMENT_OCID,
    max_tokens=1024,
    # Optionally you can specify keyword arguments for the OCI client, e.g. service_endpoint.
    client_kwargs={"service_endpoint": ENDPOINT},
)

In [None]:
service_context = ServiceContext.from_defaults(llm=llm_oci, embed_model=embed_model)
storage_context = StorageContext.from_defaults(vector_store=v_store)
storage_context.docstore.add_documents(nodes)

In [None]:
base_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
    service_context=service_context,
    show_progress=True,
)

In [None]:
base_index.vector_store.node_dict["a3fea70b-e3dc-4512-a29f-4bb3a08e3214"].relationships

In [None]:
base_index.vector_store.node_dict["77618c99-c93b-4734-b980-227b44647df7"]

In [None]:
# before query, we need this
base_index.vector_store.persist()

In [None]:
base_retriever = base_index.as_retriever(similarity_top_k=10, verbose=True)

retriever = AutoMergingRetriever(base_retriever, storage_context, verbose=True)

In [None]:
query_engine = RetrieverQueryEngine.from_args(
    retriever, service_context=service_context
)

In [None]:
%%time

question = "Write an email where you highlight advantage coming from the adoption of Data Guard for Disaster Recovery?"

response = query_engine.query(question)

print(f"Question: {question}")
print(response.response)
print("")

In [None]:
nodes = retriever.retrieve(question)

In [None]:
for node in nodes:
    display_source_node(node, source_length=1000)

In [None]:
base_nodes = base_retriever.retrieve(question)

In [None]:
for node in base_nodes:
    display_source_node(node, source_length=10000)

In [37]:
help(NodeRelationship)

Help on class NodeRelationship in module llama_index.schema:

class NodeRelationship(builtins.str, enum.Enum)
 |  NodeRelationship(value, names=None, *, module=None, qualname=None, type=None, start=1)
 |  
 |  Node relationships used in `BaseNode` class.
 |  
 |  Attributes:
 |      SOURCE: The node is the source document.
 |      PREVIOUS: The node is the previous node in the document.
 |      NEXT: The node is the next node in the document.
 |      PARENT: The node is the parent node in the document.
 |      CHILD: The node is a child node in the document.
 |  
 |  Method resolution order:
 |      NodeRelationship
 |      builtins.str
 |      enum.Enum
 |      builtins.object
 |  
 |  Data and other attributes defined here:
 |  
 |  CHILD = <NodeRelationship.CHILD: '5'>
 |  
 |  NEXT = <NodeRelationship.NEXT: '3'>
 |  
 |  PARENT = <NodeRelationship.PARENT: '4'>
 |  
 |  PREVIOUS = <NodeRelationship.PREVIOUS: '2'>
 |  
 |  SOURCE = <NodeRelationship.SOURCE: '1'>
 |  
 |  ------------