In [1]:
import os

openai_key = os.getenv("OPENAI_API_KEY")
openai_base = os.getenv("OPENAI_BASE")
openai_deployment = os.getenv("AZURE_OPEN_AI_MODEL")


os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_BASE"] = "https://prod-open-ai-service.openai.azure.com/"
os.environ["OPENAI_API_KEY"] = "30c82b416ff4456faa26b1644a83ab4b"
os.environ["OPENAI_API_VERSION"] = "2023-05-15"

In [2]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

from llama_index import (
    VectorStoreIndex,
    SimpleKeywordTableIndex,
    SimpleDirectoryReader,
    ServiceContext,
    StorageContext,
)
from IPython.display import Markdown, display

INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
NumExpr defaulting to 8 threads.


In [3]:
DATA_PATH = "../data-legal"

from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(DATA_PATH).load_data()

In [5]:
from llama_index import (
    VectorStoreIndex,
    ServiceContext,
    StorageContext,
    SimpleDirectoryReader,
)
from llama_index.llms import AzureOpenAI
from langchain.embeddings import OpenAIEmbeddings
from llama_index.llms import LangChainLLM
from llama_index import LangchainEmbedding

# load documents


# initialize service context (set chunk size)
# -- here, we set a smaller chunk size, to allow for more effective re-ranking
llm = AzureOpenAI(
    engine="PROD-GPT-16K-TURBO",
    temperature=0, openai_api_type="azure",
    openai_api_key=openai_key,
    openai_api_base=openai_base,
    openai_api_version="2023-05-15")

embedding_llm = LangchainEmbedding(
    OpenAIEmbeddings(
        deployment="PROD-text-embedding-ada-002",
        openai_api_type="azure",
        openai_api_key=openai_key,
        openai_api_base=openai_base,
        openai_api_version="2023-05-15",
        chunk_size=1500,
    ),
    embed_batch_size=16,
)

service_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model=embedding_llm,
)
# service_context = ServiceContext.from_defaults(llm=llm)

# initialize storage context (by default it's in-memory)
node_parser = service_context.node_parser

nodes = node_parser.get_nodes_from_documents(documents)

In [6]:
storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

In [9]:
vector_index = VectorStoreIndex(nodes, storage_context=storage_context)
keyword_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)

RetryError: RetryError[<Future at 0x19f79635180 state=finished raised InvalidRequestError>]

## Custom Retriever

from llama_index import QueryBundle

# import NodeWithScore
from llama_index.schema import NodeWithScore

# Retrievers
from llama_index.retrievers import (
    BaseRetriever,
    VectorIndexRetriever,
    KeywordTableSimpleRetriever,
)

from typing import List

In [None]:
class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both semantic search and hybrid search."""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        keyword_retriever: KeywordTableSimpleRetriever,
        mode: str = "AND",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._keyword_retriever = keyword_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        keyword_nodes = self._keyword_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        keyword_ids = {n.node.node_id for n in keyword_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in keyword_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(keyword_ids)
        else:
            retrieve_ids = vector_ids.union(keyword_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes

## Running queries

from llama_index import get_response_synthesizer
from llama_index.query_engine import RetrieverQueryEngine

# define custom retriever
vector_retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=2)
keyword_retriever = KeywordTableSimpleRetriever(index=keyword_index)
custom_retriever = CustomRetriever(vector_retriever, keyword_retriever)

# define response synthesizer
response_synthesizer = get_response_synthesizer()

# assemble query engine
custom_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

# vector query engine
vector_query_engine = RetrieverQueryEngine(
    retriever=vector_retriever,
    response_synthesizer=response_synthesizer,
)
# keyword query engine
keyword_query_engine = RetrieverQueryEngine(
    retriever=keyword_retriever,
    response_synthesizer=response_synthesizer,
)

In [None]:
response = custom_query_engine.query("What is the name of the author?")
print(response)
