In [15]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain.chains import RetrievalQA
from langchain_ollama.llms import OllamaLLM
import os
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.output_parsers import StrOutputParser

https://python.langchain.com/api_reference/ollama/llms/langchain_ollama.llms.OllamaLLM.html#langchain_ollama.llms.OllamaLLM.model

In [16]:
# Load PDF files from a folder
def load_pdfs(folder_path):
    documents = []
    for file in os.listdir(folder_path):
        if file.endswith(".pdf"):
            loader = PyPDFLoader(os.path.join(folder_path, file))
            documents.extend(loader.load())
    return documents

In [17]:
# Split text into chunks
def split_documents(documents):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=5000, chunk_overlap=50
    )
    return text_splitter.split_documents(documents)

In [18]:
# Initialize Chroma vector database
def setup_chroma(docs, persist_directory="./chroma_db"):
    embeddings = OllamaEmbeddings(model="all-minilm")  # Using model for embeddings
    db = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
    return db

In [19]:
# Initialize retrieval and QA system
def get_qa_chain(db):
    retriever = db.as_retriever()
    llm = OllamaLLM(model="llama3.2:1b", num_gpu = -1)


    system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
    )
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
        ]
    )
    question_answer_chain = create_stuff_documents_chain(llm, prompt)
    chain = create_retrieval_chain(retriever, question_answer_chain)
    
    qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever)
    return qa_chain

In [20]:
folder_path = "./LocalDocs"  # Change this to your actual folder
documents = load_pdfs(folder_path)
split_docs = split_documents(documents)
db = setup_chroma(split_docs)

In [21]:
embeddings = OllamaEmbeddings(model="all-minilm")  # Using model for embeddings
persist_directory = "./chroma_db"
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
qa_chain = get_qa_chain(db)

In [22]:
retriever = db.as_retriever()
docs = retriever.invoke("safety")
docs[0]

Document(id='4a6ba2f0-7278-417c-a8f7-8555c9dce8e4', metadata={'page': 12, 'page_label': '13', 'source': './LocalDocs\\MIL-STD-882E.pdf'}, page_content='MIL-STD-882E \n7 \n \nand sustainment of the system/product/equipment to meet the user’s operational needs.  The PM \nis accountable for credible cost, schedule, and performance reporting to the Milestone Decision \nAuthority. \n \n3.2.27  Re-use items.  Items previously developed under another program or for a \nseparate application that are used in a program. \n \n3.2.28  Risk.  A combination of the severity of the mishap and the probability that the \nmishap will occur. \n \n3.2.29  Risk level.  The characterization of risk as either High, Serious, Medium, or Low. \n \n3.2.30  Safety.  Freedom from conditions that can cause death, injury, occupational \nillness, damage to or loss of equipment or property, or damage to the environment. \n \n3.2.31  Safety-critical.  A term applied to a condition, event, operation, process, or item \nw

In [23]:
query = "define software safety"
response = qa_chain.invoke(query)
print("AI Response:", response)

AI Response: {'query': 'define software safety', 'result': 'Software safety refers to the design, development, testing, and maintenance of software systems that are free from faults, errors, and hazards. It involves ensuring that software meets specific safety standards, requirements, and regulations to prevent accidents, injuries, or damage to people or property.\n\nIn other words, software safety is about designing and developing software that is:\n\n1. **Fault-tolerant**: able to recover from faults and errors\n2. **Error-free**: free from defects and bugs\n3. **Safe**: safe for users, operators, and the environment\n4. **Robust**: able to withstand various forms of stress and testing\n5. **Secure**: protected against unauthorized access, modification, or tampering\n\nSoftware safety is an essential aspect of software engineering, as it ensures that software systems are reliable, maintainable, and efficient. It also contributes to reducing the risk of errors, defects, and accidents,