In [1]:
import shutil
import nest_asyncio
nest_asyncio.apply()
import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    SummaryIndex
)

from llama_index.core.settings import Settings
from llama_index.core.agent import ReActAgent
from llama_index.core.schema import IndexNode

from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

#LLM Config
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")


from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-4o-mini",api_key=OPENAI_API_KEY)

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 512

# Global list to store nodes
objects = []
saved_files = {}

# Global variable to store query engine
global_query_engine = None

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /opt/anaconda3/envs/COS243/lib/python3.11/site-
[nltk_data]     packages/llama_index/core/_static/nltk_cache...
[nltk_data]   Package punkt_tab is already up-to-date!


In [28]:
def search_query(query, history):
    """Handle user query with the query engine"""
    global global_query_engine

    prompt = f"CONTEXT: {str(history)}\n USER INPUT: {query}"

    # Check if query engine is available
    if global_query_engine is None:
        return {
            'role': "assistant",
            'content': "Upload documents first! Ready to assist you. 📚"
        }

    try:
        # Use the query engine to process the query
        response = global_query_engine.query(prompt)
        print(response.source_nodes)

        return {
            'role': "assistant",
            'content': response.text
        }
    except Exception as e:
        return {
            'role': "assistant",
            'content': f"An error occurred: {str(e)}"
        }


In [None]:
files = [file for file in os.listdir('./upload') if os.path.isfile(os.path.join('./upload', file))]

In [29]:
from CustomCitationEngine import CustomCitationQueryEngine

In [30]:
for file in files:
        destination_path = f"./upload/{file}"
        docs = SimpleDirectoryReader(input_files=[destination_path]).load_data()
        vector_index = VectorStoreIndex.from_documents(docs)
        summary_index = SummaryIndex.from_documents(docs)

        # define query engines
        vector_query_engine = CustomCitationQueryEngine.from_args(vector_index, similarity_top_k=3, citation_chunk_size=512,)
        summary_query_engine = CustomCitationQueryEngine.from_args(summary_index, similarity_top_k=3, citation_chunk_size=512,)

        # define tools
        query_engine_tools = [
            QueryEngineTool(
                query_engine=vector_query_engine,
                metadata=ToolMetadata(
                    name="vector_tool",
                    description=(
                        f"Useful for retrieving specific context from {file}."
                    ),
                ),
            ),
            QueryEngineTool(
                query_engine=summary_query_engine,
                metadata=ToolMetadata(
                    name="summary_tool",
                    description=(
                        "Useful for summarization questions related to."
                        f" {file}"
                    ),
                ),
            ),
        ]
        agent = ReActAgent.from_tools(
            query_engine_tools,
            llm=llm,
            verbose=True,
        )
        book_summary = (f"This content contains the full book '{file}'."
                        f"Use this index if you need to lookup specific facts related '{file}'")
        node = IndexNode(text=book_summary, index_id=file, obj=agent)
        objects.append(node)

        vector_index = VectorStoreIndex(
            objects=objects,
        )

        global_query_engine = vector_index.as_query_engine(similarity_top_k=1, verbose=True)

In [31]:
response = global_query_engine.query("Tell me about interaction between Alan Turing and Claude Shannon and how this impact Shannon's work on information theory")

[1;3;38;2;11;159;203mRetrieval entering The Theory That Would Not Die How Bayes Rule Cracked the Enigma Code Hunted Down Russian Submarines and Emerged Triumphant from Two Centuries of Controversy by Sharon Bertsch McGrayne.pdf: ReActAgent
[0m[1;3;38;2;237;90;200mRetrieving from object ReActAgent with query Tell me about interaction between Alan Turing and Claude Shannon and how this impact Shannon's work on information theory
[0m> Running step 394bd8a7-695e-46fb-b796-79fb83b9b7a7. Step input: Tell me about interaction between Alan Turing and Claude Shannon and how this impact Shannon's work on information theory
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: vector_tool
Action Input: {'input': "interaction between Alan Turing and Claude Shannon and its impact on Shannon's work on information theory"}
[0m[1;3;34mObservation: Error: type object 'TextNode' has no attribute 'model_validate'
[0m> Run

KeyboardInterrupt: 