In [5]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters.character import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

from os import listdir
from os.path import join

In [6]:
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise:
    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)

def load_documents(folder):
    files = [file for file in listdir("library") if file.endswith(".pdf")]
    docs = [PyPDFLoader(join(folder, file)).load() for file in files]
    return [item for sublist in docs for item in sublist]

In [7]:
class RAGApplication:

    def __init__(self, folder="library"):

        self.embedder = OllamaEmbeddings(model="qwen3-embedding:8b", temperature=0)
        self.llm = ChatOllama(model="llama3.1:8b", temperature=0)

        self.rag_chain = prompt | self.llm | StrOutputParser()
        self.text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
            chunk_size=250,
            chunk_overlap=0
        )

        self.docs_list = load_documents(folder=folder)
        self.vectorstore = self.create_vectorstore()
        self.retriever = self.vectorstore.as_retriever(
            search_type="mmr",
            search_kwargs={
                "k": 5,
            }
        )

    def create_vectorstore(self):

        return SKLearnVectorStore.from_documents(
            documents=self.text_splitter.split_documents(self.docs_list),
            embedding=self.embedder,
        )

    def run(self, question):

        documents = self.retriever.invoke(question)
        content = [doc.page_content for doc in documents]
        content = "\\n".join(content)

        answer = self.rag_chain.invoke({
            "question": question,
            "documents": content
        })

        return answer

In [None]:
rag_app = RAGApplication()

In [None]:
rag_app.run("Welche Anforderungen an Bachelorarbeiten gibt es?")