In [1]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import GPT4All
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from pdf2image import convert_from_path
from langchain.vectorstores.chroma import Chroma

In [2]:
loader = PyPDFLoader("Altok10.pdf")
documents = loader.load_and_split()
len(documents)

176

In [3]:
print(documents[0].page_content)

0-1
82PH0-74EFOREWORD
This manual is an essential part of your
vehicle and should remain with the vehicle
when resold or otherwise transferred to anew owner or operator. Please read thismanual carefully before operating yournew MARUTI SUZUKI and review themanual from time to time. It contains
important information on safety, operation
and maintenance. Y ou are invited to availthe three Free Inspection Services asdescribed in the manual. Three freeinspection coupons are attached to thismanual. Please show this manual to your
dealer while you take your MARUTI
SUZUKI for any Service.To prolong the life of your vehicle andreduce maintenance cost, the periodicmaintenance must be carried out accord-ing to “PERIODIC MAINTENANCE
SCHEDULE” described in “INSPECTION
AND MAINTENANCE” section of this man-ual. It is essential for preventing troubleand accidents to ensure your satisfactionand safety.Daily inspection and care as per “DAIL YINSPECTION CHECKLIST” described in
the “INSPECTION AND MAINTEN

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=64
)
texts = text_splitter.split_documents(documents)
len(texts)

350

In [5]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [6]:
pip install --upgrade jupyter


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [7]:
pip install --upgrade ipywidgets


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [9]:
texts

[Document(page_content='0-1\n82PH0-74EFOREWORD\nThis manual is an essential part of your\nvehicle and should remain with the vehicle\nwhen resold or otherwise transferred to anew owner or operator. Please read thismanual carefully before operating yournew MARUTI SUZUKI and review themanual from time to time. It contains\nimportant information on safety, operation\nand maintenance. Y ou are invited to availthe three Free Inspection Services asdescribed in the manual. Three freeinspection coupons are attached to thismanual. Please show this manual to your\ndealer while you take your MARUTI\nSUZUKI for any Service.To prolong the life of your vehicle andreduce maintenance cost, the periodicmaintenance must be carried out accord-ing to “PERIODIC MAINTENANCE\nSCHEDULE” described in “INSPECTION\nAND MAINTENANCE” section of this man-ual. It is essential for preventing troubleand accidents to ensure your satisfactionand safety.Daily inspection and care as per “DAIL YINSPECTION CHECKLIST” descri

In [10]:
db = Chroma.from_documents(texts, embeddings, persist_directory="db")

Using embedded DuckDB with persistence: data will be stored in: db


In [11]:
llm = GPT4All(
    model="./ggml-gpt4all-j-v1.3-groovy.bin",
    # n_ctx=1000,
    backend="gptj",
    verbose=False
)

In [12]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True,
    verbose=False,
)

In [13]:
res = qa(f"""
    Detialed explination of the Altok10 pdf given?
""")
print(res["result"])



The given PDF is a technical manual for the ALTO K10, a portable, battery-operated, high-intensity, low-voltage, high-frequency, high-power, and high-resolution flash lamp. The manual provides detailed information on the various features and functions of the device, including its operating procedures, safety precautions, and troubleshooting tips. It also includes illustrations and photographs to help users understand the device's components and features. Overall, the manual is a valuable resource for anyone who wants to learn more about the ALTO K10 and its capabilities.


In [14]:
res = qa(f"""
    What is WARRANTY POLICY?
""")
print(res["result"])

In [None]:
res = qa(f"""
    Warning for modification?
""")
print(res["result"])