In [23]:
from llama_index.readers.smart_pdf_loader import SmartPDFLoader
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import get_response_synthesizer, DocumentSummaryIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.indices.document_summary import DocumentSummaryIndexEmbeddingRetriever
import nest_asyncio
import os
import openai
import logging
import sys
import nest_asyncio

nest_asyncio.apply()

logging.basicConfig(stream=sys.stdout, level=logging.WARNING)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

os.environ["OPENAI_API_KEY"] = "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"
openai.api_key = os.environ["OPENAI_API_KEY"]

def get_pdf_urls(base_path):
    all_files = []  # List to store all relevant files
    for root, _, files in os.walk(base_path):
        # Filter and add files to the list, excluding system and temporary files
        files = [file for file in files if not file.startswith('.') and not file.startswith('~$')]
        full_paths = [os.path.join(root, file) for file in files]
        all_files.extend(full_paths)
    return all_files

def pdf_reader(base_path):
    llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all"
    pdfs = get_pdf_urls(base_path)
    pdf_loader = SmartPDFLoader(llmsherpa_api_url=llmsherpa_api_url)
    for pdf in pdfs:
        documents = pdf_loader.load_data(pdf)
    return documents

nest_asyncio.apply()

def initialize_document_summary_index(documents):
    """Initializes and returns a DocumentSummaryIndex from a list of documents."""
    # Initialize OpenAI model and splitter
    chatgpt = OpenAI(temperature=0, model="gpt-3.5-turbo")
    splitter = SentenceSplitter(chunk_size=1024)
    
    # Get the response synthesizer set for summarizing documents
    response_synthesizer = get_response_synthesizer(
        response_mode="tree_summarize", use_async=True
    )
    
    # Build and return the document summary index
    doc_summary_index = DocumentSummaryIndex.from_documents(
        documents,
        llm=chatgpt,
        transformations=[splitter],
        response_synthesizer=response_synthesizer,
        show_progress=True
    )
    return doc_summary_index

def create_retriever(doc_summary_index):
    retriever = DocumentSummaryIndexEmbeddingRetriever(doc_summary_index, similarity_top_k=1)
    return retriever

def setup_query_engine():
    base_path = "/Users/skylerwilson/Desktop/PartsWise/co-pilot-v1/data/knowledge_database"
    documents = pdf_reader(base_path)
    doc_summary_index = initialize_document_summary_index(documents)
    retriever = create_retriever(doc_summary_index)

    response_synthesizer = get_response_synthesizer(response_mode="tree_summarize")
    query_engine = RetrieverQueryEngine(
        retriever=retriever,
        response_synthesizer=response_synthesizer,
    )
    return query_engine


query_engine = setup_query_engine()



Parsing nodes: 100%|██████████| 11/11 [00:00<00:00, 2485.71it/s]
Summarizing documents:   0%|          | 0/11 [00:00<?, ?it/s]

current doc id: 161fa426-161a-4993-bb37-e731fb9833d8


Summarizing documents:   9%|▉         | 1/11 [00:01<00:16,  1.61s/it]

current doc id: 0f07aa9d-ca5d-4f25-828c-5c56e80e0c5c


Summarizing documents:  18%|█▊        | 2/11 [00:02<00:12,  1.34s/it]

current doc id: 3a7700da-fbc5-47ff-8514-877af7c98f18


Summarizing documents:  27%|██▋       | 3/11 [00:04<00:11,  1.42s/it]

current doc id: 19c051a6-76ff-4433-9fcf-b54f5a924ca1


Summarizing documents:  36%|███▋      | 4/11 [00:06<00:10,  1.55s/it]

current doc id: a22bdf00-8cbd-46a6-bd5b-d2ae5c05c9ee


Summarizing documents:  45%|████▌     | 5/11 [00:07<00:10,  1.69s/it]

current doc id: 223fc3bf-4335-4df9-ad1a-19e9c5372421


Summarizing documents:  55%|█████▍    | 6/11 [00:09<00:08,  1.67s/it]

current doc id: a527be08-dc3e-4f85-8ac0-1caeea198fdf


Summarizing documents:  64%|██████▎   | 7/11 [00:10<00:05,  1.43s/it]

current doc id: b6b17861-1ad8-4bae-98a6-35edbb5578ae


Summarizing documents:  73%|███████▎  | 8/11 [00:12<00:04,  1.61s/it]

current doc id: c105ace6-ac22-49bf-b2a8-c5b0d617a4dd


Summarizing documents:  82%|████████▏ | 9/11 [00:15<00:03,  1.95s/it]

current doc id: 2ffead00-851a-4eb8-845b-29c307547668


Summarizing documents:  91%|█████████ | 10/11 [00:17<00:01,  1.92s/it]

current doc id: 40e7186c-ea33-49a6-8941-3b0ac3d4b6b0


Summarizing documents: 100%|██████████| 11/11 [00:18<00:00,  1.64s/it]
Generating embeddings: 100%|██████████| 11/11 [00:00<00:00, 31.19it/s]


In [24]:
# Now, use query_engine to perform queries
response = query_engine.query("How can i impreve the turnover of my parts department?")
print(response)

You can improve the turnover of your parts department by establishing a culture of continuous improvement within your dealership's inventory management processes. Encourage feedback from staff members involved in inventory handling, regularly review and refine inventory management procedures based on performance metrics and feedback. By continuously seeking ways to enhance efficiency and adapt to evolving market dynamics, you can reduce the risk of surplus auto parts inventory and improve overall operational effectiveness.
