INITIALIZE PACKAGE INSTALL AND GOOGLE DRIVE CONNECTION

In [None]:
!pip install langchain
!pip install openai openai==0.28.1
!pip install chromadb

!pip install pypdf
!pip install tiktoken
!pip install faiss-cpu

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


ADD OPENAI API KEY

In [None]:
import os
import getpass

os.environ['OPENAI_API_KEY'] = getpass.getpass('Paste API key:')

Paste API key:··········


LOAD SINGLE DOC AND CREATE VECTOR STORE/EMBEDDINGS

In [None]:
# SINGLE DOC IMPLEMENTATION
# document loader that splits on page
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("/content/drive/MyDrive/SamplePDF/SampleCATmanual.pdf")
pages = loader.load_and_split()

In [None]:
# SINGLE DOC IMPLEMENTATION
# demonstrating the similarity search and using mmr for retriever
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings


faiss_index = FAISS.from_documents(pages, OpenAIEmbeddings())
docs = faiss_index.similarity_search("Where can I find safety warnings for Material Handling Arm?", k=3)
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content[:100])

retriever = faiss_index.as_retriever(search_type='mmr', search_kwargs={'k': 6, 'lambda_mult': 0.25})


91: 92 SEBU8407-06
Operation Section
Operation
i01964204
Material Handling Arm
Operation
SMCS Code: 6400
17: 18 SEBU8407-06
Safety Section
Safety Messages
Industrial Grapple Bucket (1)
19: 20 SEBU8407-06
Safety Section
Safety Messages
Angle Blade (5)


'\nQ1\ndoc 1 --> 3\ndoc 2 --> 5\ndoc 4 --> 0\ndoc 5\n\n10 chunks\n\ndoc 1 --> 10\n\nQ2\ndoc1 --> 7\ndoc2 --> 6\n'

Create chatbot using SINGLE DOC WITHOUT CHAT MEMORY

In [None]:
# SINGLE DOC IMPLEMENTATION W/O CHAT MEM
# chain type 'RetrievalQA' useful for single query questioning
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(temperature = 0.7, model = 'gpt-3.5-turbo-16k')

from langchain.chains import RetrievalQA

qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True,
)

In [None]:
# SINGLE DOC IMPLEMENTATION
# tester query
query =  "How can I attach a load related to Material Handling Arm?"

response = qa_stuff.run(query)
print(response)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
To attach a load to the Material Handling Arm, follow these steps:

1. Verify that the load does not exceed the weight limit. Refer to the Operation and Maintenance Manual, "Material Handling Arm Rated Load" for the rated load capacities.

2. Keep all personnel out of the work area at all times, except when you are attaching or removing a load.

3. Enter the machine and start the engine.

4. Disengage the parking brake.

5. Keep the loader arms in the fully lowered position. Slowly position the material handling arm until either lifting point 1 or lifting point 2 is directly above the load.

6. Tilt the material handling arm forward until the hook is slightly higher than the load to minimize swinging.

7. Stop the engine.

8. Wait as a second person securely attaches the load to the hook, ensuring that the hook clasp is locked in place.

9. Ensure that all personnel have left the work area.

10. Start the engine a

Create chatbot using SINGLE DOC WITH CHAT MEMORY

In [None]:
# SINGLE DOC IMPLEMENTATION W/ CHAT MEM
# chain type 'ConversationalRetrievalChain' useful for multiple query questioning
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    memory=memory
)

In [None]:
# SINGLE DOC QUERY W/ CHAT MEM
query = "What is the SMCS code for the Material Handling Arm?"
result = qa({"question": query})
print(result['answer'])

The SMCS code for the Material Handling Arm is 6400; 6700; 7000.


In [None]:
# SINGLE DOC QUERY W/ CHAT MEM CONT.
query = "How can I attach it?"
result = qa({"question": query})
print(result['answer'])

To attach the Material Handling Arm, follow these steps:

1. Verify that the load does not exceed the weight limit. Refer to the Operation and Maintenance Manual for the rated load capacities.

2. Keep all personnel out of the work area, except when attaching or removing the load.

3. Enter the machine and start the engine.

4. Disengage the parking brake.

5. Keep the loader arms in the fully lowered position. Slowly position the material handling arm until either lifting point 1 or lifting point 2 is directly above the load.

6. Tilt the material handling arm forward until the hook is slightly higher than the load to minimize swinging.

7. Stop the engine.

8. Wait for a second person to securely attach the load to the hook, ensuring that the hook clasp is in the locked position.

9. Ensure that all personnel have left the work area.

10. Start the engine and disengage the parking brake.

11. Slowly tilt back the material handling arm until it is fully tilted back.

12. Stop the engi

In [None]:
# SINGLE DOC QUERY W/ CHAT MEM CONT.
query = "Will I need two people for attachment?"
result = qa({"question": query})
print(result['answer'])

Yes, according to the information provided, it is recommended to have two people to attach the load to the Material Handling Arm. This is mentioned in the context under the "Two Person Operation" section of the Operation section.


LOAD MULTI DOC AND CREATE VECTOR STORE/EMBEDDINGS


In [None]:
# MULTI DOC IMPLEMENTATION
# testing for loading multiple documents and creating a vector store
# not applicable to current data, but helps define where chunks are being pulled from
all_documents = []

loaders = ["/content/drive/MyDrive/SamplePDF/SampleCATmanual.pdf", "/content/drive/MyDrive/SamplePDF/SampleCATRP3600.pdf"]

for l in loaders:

    loader = PyPDFLoader(l)
    pages = loader.load_and_split()
    all_documents.extend(pages)

embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(all_documents, embeddings)

In [None]:
# MULTI DOC IMPLEMENTATION
# adding multiple documents to the similarity search and showing what chunks get pulled
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

faiss_index = FAISS.from_documents(all_documents, OpenAIEmbeddings())
docs = faiss_index.similarity_search("Where can I find the instructions for Engine Valve inspection and replacement?", k=3)
for doc in docs:
    print(str(doc.metadata["page"]) + ":", doc.page_content[:100])

retriever = faiss_index.as_retriever(search_type='mmr', search_kwargs={'k': 6, 'lambda_mult': 0.25})

12: 13 
 
Engine Valve – Inspect/Replace  
 
Remove the muffler, air filter assembly, recoil 
starter, s
13: 14 
 Make sure that the reamer is centered over the 
valve guide. Coat the reamer and valve guide 
w
2: 3 
 Table of Contents 


IMPLEMENT MULTI RETRIEVER TEST:
1. https://python.langchain.com/docs/use_cases/question_answering/multi_retrieval_qa_router
  - dynamically choose a retriever

In [None]:
from langchain.chains.router import MultiRetrievalQAChain
from langchain.llms import OpenAI

from langchain.embeddings import OpenAIEmbeddings
from langchain.document_loaders import TextLoader
from langchain.vectorstores import FAISS

from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

In [None]:
# CREATE MULTIPLE RETRIEVERS
# including both single and multi doc retrievers to demonstrate applicabilty for both cases
# search_type should remain consistent, search_kwargs are adaptiable to data within retriever

# Single doc1
loader1 = PyPDFLoader("/content/drive/MyDrive/SamplePDF/SampleCATmanual.pdf")
pages1 = loader1.load_and_split()

faiss_index1 = FAISS.from_documents(pages1, OpenAIEmbeddings())
retriever1 = faiss_index1.as_retriever(search_type='mmr', search_kwargs={'k': 6, 'lambda_mult': 0.25})


# Single doc2
loader2 = PyPDFLoader("/content/drive/MyDrive/SamplePDF/SampleCATRP3600.pdf")
pages2 = loader2.load_and_split()

faiss_index2 = FAISS.from_documents(pages2, OpenAIEmbeddings())
retriever2 = faiss_index2.as_retriever(search_type='mmr', search_kwargs={'k': 6, 'lambda_mult': 0.25})


# Multi docs1
all_documents1 = []

loaders1 = ["/content/drive/MyDrive/SamplePDF/Excavator_Manuals/Engine_Manuals/CAT_C15_Engine_Manual.pdf",
           "/content/drive/MyDrive/SamplePDF/Excavator_Manuals/Engine_Manuals/CAT_C18_Engine_Manual.pdf"]

for l in loaders1:

    loader = PyPDFLoader(l)
    pages = loader.load_and_split()
    all_documents1.extend(pages)


multi_faiss_index1 = FAISS.from_documents(all_documents1, OpenAIEmbeddings())
multi_retriever1 = multi_faiss_index1.as_retriever(search_type='mmr', search_kwargs={'k': 6, 'lambda_mult': 0.25})


# Multi docs2
all_documents2 = []

loaders2 = ["/content/drive/MyDrive/SamplePDF/Excavator_Manuals/Excavator_Manuals/CAT_Excavator_Model_304.pdf",
           "/content/drive/MyDrive/SamplePDF/Excavator_Manuals/Excavator_Manuals/CAT_Excavator_Model_3015.pdf"]

for l in loaders2:

    loader = PyPDFLoader(l)
    pages = loader.load_and_split()
    all_documents2.extend(pages)


multi_faiss_index2 = FAISS.from_documents(all_documents2, OpenAIEmbeddings())
multi_retriever2 = multi_faiss_index2.as_retriever(search_type='mmr', search_kwargs={'k': 6, 'lambda_mult': 0.25})

In [None]:
# PROVIDE INFO REGARDING EACH RETRIEVER
# used when deciding which retriever will pull data from based on cosine similarity (compares to description)
# can set default chain to include all data

retriever_infos = [
    {
        "name": "Compact Track Loaders",
        "description": "Good for answering questions about the Operation and Maintenance for Compact Track Loaders",
        "retriever": retriever1
    },
    {
        "name": "RP3600 Portable Generator",
        "description": "Good for answering questions about the Service for RP3600 Portable Generators",
        "retriever": retriever2
    },
    {
        "name": "CAT Engine Manuals",
        "description": "Good for answering questions about CAT Engines C15 and C18",
        "retriever": multi_retriever1
    },
    {
        "name": "CAT Excavator Model Manuals",
        "description": "Good for answering questions about CAT Excavators 304 and 3015",
        "retriever": multi_retriever2
    }
]

In [None]:
# CREATE MULTI-RETRIEVAL CHAIN
chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), retriever_infos, verbose=True)

In [None]:
print(chain.run("Can you give me the details for the C18 Engine Configuration?"))



[1m> Entering new MultiRetrievalQAChain chain...[0m




CAT Engine Manuals: {'query': 'Can you give me the details for the C18 Engine Configuration?'}
[1m> Finished chain.[0m
 The C18 Engine Configuration is an in-line 6, 4-stroke-cycle diesel with a bore of 145 mm (5.7 in), a stroke of 183 mm (7.2 in), and a displacement of 18.1 L (1104.5 in³). It has a 16.0:1 compression ratio, and is series twin turbocharged-aftercooled (TTA, >560 kW).


In [None]:
print(chain.run("What is the SMCS code for the Material Handling Arm in compact track loaders?"))



[1m> Entering new MultiRetrievalQAChain chain...[0m




Compact Track Loaders: {'query': 'What is the SMCS code for the Material Handling Arm in compact track loaders?'}
[1m> Finished chain.[0m
 6400; 6700; 7000


In [None]:
print(chain.run("What are the engine specifications of the C15 engine?"))



[1m> Entering new MultiRetrievalQAChain chain...[0m




CAT Engine Manuals: {'query': 'What are the engine specifications of the C15 engine?'}
[1m> Finished chain.[0m
 The C15 engine has 6 cylinders with a bore of 145 mm (5.7 in) and a stroke of 183 mm (7.2 in). The displacement is 18.1 L (1104.5 in3) and the compression ratio is 16.0:1. It is a turbocharged direct injection 4-stroke-cycle diesel engine with a counterclockwise rotation from the flywheel end. The cooling system capacity is 27 L (28 qts) and the lube system refill is 40-74 L (10.5-19.5 gal).
