In [1]:
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama import ChatOllama
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import sys

In [2]:
def ingest():
    
    loader = PyPDFLoader(file_path='TVM.pdf')
    pages = loader.load_and_split()
    
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024,
        chunk_overlap=100,
        length_function=len,
        add_start_index=True,
    )
    
    chunks = text_splitter.split_documents(pages)
    print(f"Split {len(pages)} documents into {len(chunks)} chunks.")
    
    embedding = FastEmbedEmbeddings()
    
    Chroma.from_documents(documents=chunks,  embedding=embedding, persist_directory="./chroma_db")

In [3]:
ingest()

Split 20 documents into 56 chunks.


In [4]:
from huggingface_hub import login
import os
from dotenv import load_dotenv
load_dotenv()

login(token=os.getenv("HF_TOKEN"))

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [5]:
def rag_chain():
    model = ChatOllama(model="llama3")
    
    prompt = PromptTemplate.from_template(
        """
        <s> [Instructions] You are a friendly assistant. Answer the question based only on the following context. 
        If you don't know the answer, then reply, No Context availabel for this question {input}. [/Instructions] </s> 
        [Instructions] Question: {input} 
        Context: {context} 
        Answer: [/Instructions]
        """
    )

    embedding = FastEmbedEmbeddings()
    vector_store = Chroma(persist_directory="./chroma_db", embedding_function=embedding)

    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={
            "k": 3,
            "score_threshold": 0.5,
        },
    )

    document_chain = create_stuff_documents_chain(model, prompt)
    chain = create_retrieval_chain(retriever, document_chain)
    
    return chain

In [6]:
def ask(query: str):
    chain = rag_chain()
    result = chain.invoke({"input": query})
    print(result["answer"])
    
    for doc in result["context"]:
        print("Source: ", doc.metadata["source"])

In [7]:
ask("What are Single Payment Problems?")

Single Payment Problems refer to financial problems that involve a single payment at a future date or a present value problem where the goal is to find the interest rate or the number of periods (n). These problems can be solved using the formula FV = PV (1 + r)^n, where FV is the future value, PV is the present value, and r is the interest rate. The problems can be classified into three types:

* Single payment, future value: Given the present value and the number of periods, find the future value.
* Single payment, present value: Given the future value and the number of periods, find the present value.
* Single payment, interest rate: Given the present value and the future value (or both), find the interest rate.
Source:  TVM.pdf
Source:  TVM.pdf
Source:  TVM.pdf
