In [25]:
from llama_index.readers.smart_pdf_loader import SmartPDFLoader
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import get_response_synthesizer, DocumentSummaryIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.indices.document_summary import DocumentSummaryIndexEmbeddingRetriever
import nest_asyncio
import os
import openai
import logging
import sys
import nest_asyncio

nest_asyncio.apply()

logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

os.environ["OPENAI_API_KEY"] = "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"
openai.api_key = os.environ["OPENAI_API_KEY"]

def get_pdf_urls(base_path):
    all_files = []  # List to store all relevant files
    for root, _, files in os.walk(base_path):
        # Filter and add files to the list, excluding system and temporary files
        files = [file for file in files if not file.startswith('.') and not file.startswith('~$')]
        full_paths = [os.path.join(root, file) for file in files]
        all_files.extend(full_paths)
    return all_files

def pdf_reader(base_path):
    llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all"
    pdfs = get_pdf_urls(base_path)
    pdf_loader = SmartPDFLoader(llmsherpa_api_url=llmsherpa_api_url)
    for pdf in pdfs:
        documents = pdf_loader.load_data(pdf)
    return documents

nest_asyncio.apply()

def initialize_document_summary_index(documents):
    """Initializes and returns a DocumentSummaryIndex from a list of documents."""
    # Initialize OpenAI model and splitter
    chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
    splitter = SentenceSplitter(chunk_size=1024)
    
    # Get the response synthesizer set for summarizing documents
    response_synthesizer = get_response_synthesizer(
        response_mode="tree_summarize", use_async=True
    )
    
    # Build and return the document summary index
    doc_summary_index = DocumentSummaryIndex.from_documents(
        documents,
        llm=chatgpt,
        transformations=[splitter],
        response_synthesizer=response_synthesizer,
        show_progress=True
    )
    return doc_summary_index

def create_retriever(doc_summary_index):
    retriever = DocumentSummaryIndexEmbeddingRetriever(doc_summary_index, similarity_top_k=1)
    return retriever

def setup_query_engine():
    base_path = "/Users/skylerwilson/Desktop/PartsWise/co-pilot-v1/data/knowledge_database"
    documents = pdf_reader(base_path)
    doc_summary_index = initialize_document_summary_index(documents)
    retriever = create_retriever(doc_summary_index)

    response_synthesizer = get_response_synthesizer(response_mode="tree_summarize")
    query_engine = RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
    )
    return query_engine


query_engine = setup_query_engine()



Parsing nodes: 100%|██████████| 11/11 [00:00<00:00, 1929.46it/s]
Summarizing documents:   0%|          | 0/11 [00:00<?, ?it/s]

current doc id: 06d26960-2688-4b03-a7d6-03357412de2f


Summarizing documents:   9%|▉         | 1/11 [00:01<00:19,  1.93s/it]

current doc id: 7b6c0058-79ba-441d-8e48-5b37ac528ece


Summarizing documents:  18%|█▊        | 2/11 [00:03<00:14,  1.64s/it]

current doc id: 8f61f5f8-951b-4a0a-a03c-d2d7d400b275


Summarizing documents:  27%|██▋       | 3/11 [00:05<00:15,  1.97s/it]

current doc id: 84c444d2-9576-48ca-95d5-9009eb1c82bd


Summarizing documents:  36%|███▋      | 4/11 [00:07<00:13,  1.96s/it]

current doc id: 1610809f-3d0c-4aa3-997f-3bf01be411bb


Summarizing documents:  45%|████▌     | 5/11 [00:09<00:11,  1.88s/it]

current doc id: 2c3f47ad-ff38-4dbb-8d05-34c1227c9e88


Summarizing documents:  55%|█████▍    | 6/11 [00:11<00:09,  1.87s/it]

current doc id: b251c88e-64c8-420e-b46c-817c08202c60


Summarizing documents:  64%|██████▎   | 7/11 [00:12<00:07,  1.79s/it]

current doc id: edb89d55-6fee-45c6-8ad3-53cb0b50586b


Summarizing documents:  73%|███████▎  | 8/11 [00:14<00:05,  1.84s/it]

current doc id: 1e234b03-171e-453c-b8da-5488b3841f42


Summarizing documents:  82%|████████▏ | 9/11 [00:16<00:03,  1.91s/it]

current doc id: 9bf35d9e-79ae-457a-91c0-27c15d788941


Summarizing documents:  91%|█████████ | 10/11 [00:18<00:01,  1.68s/it]

current doc id: a004518b-a5d7-4a48-8168-01238ab40918


Summarizing documents: 100%|██████████| 11/11 [00:19<00:00,  1.75s/it]
Generating embeddings: 100%|██████████| 11/11 [00:00<00:00, 31.29it/s]


In [26]:
# Now, use query_engine to perform queries
response = query_engine.query("How can i impreve the turnover of my parts department?")
print(response)

You can improve the turnover of your parts department by establishing a culture of continuous improvement within your dealership's inventory management processes. Encourage feedback from staff members involved in inventory handling, regularly review and refine inventory management procedures based on performance metrics and feedback. By continuously seeking ways to enhance efficiency and adapt to evolving market dynamics, you can reduce the risk of surplus auto parts inventory and improve overall operational effectiveness.
