In [10]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")

### Step 1: Load documents from PDF files
[Document Loader Link](https://python.langchain.com/docs/how_to/#document-loaders)

In [11]:

# Define the directory containing PDF files
pdf_directory = './docs'

# List to hold loaded documents
documents = []

# Get all PDF files from the directory
file_paths = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith('.md')]

from langchain.document_loaders import UnstructuredMarkdownLoader
# Load PDFs using PyPDFLoader
for file_path in file_paths:
    loader = UnstructuredMarkdownLoader(file_path)
    documents.extend(loader.load())

### Step 2: Split the documents into chunks
[Text Split Link](https://python.langchain.com/docs/how_to/#text-splitters)

In [12]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Split is require because LLM have different context size, also when storing in vector database it is always good to store in chunk
text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, 
        chunk_overlap=200
    )
text_splitted_document = text_splitter.split_documents(documents) 

### Step 3: Create embeddings and store in vector database
[Embedding Link](https://python.langchain.com/docs/how_to/#embedding-models)

In [13]:
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(text_splitted_document, embeddings)

### Step 4: Query Processing and retrieving data
[Retrieving Link](https://python.langchain.com/docs/how_to/#retrievers)

In [14]:
query = "Fixed mortgage payment"
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3},
)
result = retriever.invoke(query)
print(len(result))
result[0].page_content

3


'Taxable Investments\n\nBrokerage Account: $64,230\n\nGrowth YTD: 6.8%\n\nAllocation: 65% individual stocks, 35% ETFs\n\nCryptocurrency: $5,890\n\nGrowth YTD: -12.3%\n\nHoldings: BTC, ETH, SOL\n\nDebt Overview\n\nMortgage: $298,400 remaining\n\nInterest Rate: 3.25%\n\nMonthly Payment: $1,850 (including escrow)\n\nPayoff Date: November 2049\n\nCar Loan: $18,650 remaining\n\nInterest Rate: 4.1%\n\nMonthly Payment: $385\n\nPayoff Date: March 2027\n\nCredit Cards: $0 (paid in full monthly)\n\nFinancial Goals Progress\n\nShort-term Goals (1 year)\n\nEmergency Fund: $18,000 target, currently at $15,000 (83%)\n\nHome Repair Fund: $10,000 target, currently at $7,500 (75%)\n\nVacation Fund: $3,500 target, currently at $2,800 (80%)\n\nMedium-term Goals (1-5 years)\n\nNew Vehicle Purchase: $35,000 target, currently at $8,000 (23%)\n\nHome Renovation: $25,000 target, currently at $0 (0%)\n\nLong-term Goals\n\nRetirement: On track based on current contribution rate\n\nCollege Fund for Kids: Behind 

In [18]:
# Create QA chain
from langchain.chains import RetrievalQA
from langchain_openai import OpenAI

llm = OpenAI(temperature=0)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True
)

In [23]:
query = "how is my health status"
result = qa_chain.invoke({"query": query})
print("\nQuestion:", query)
# print("\nAnswer:", result["result"])
import textwrap

wrapped_text = textwrap.fill(result["result"], width=80)  # Adjust width as needed
print("\nAnswer:\n", wrapped_text)
# print("\nSources:")
# for i, doc in enumerate(result["source_documents"]):
#     print(f"Source {i+1}: {doc.metadata.get('source', 'Unknown')}, Page {doc.metadata.get('page', 'Unknown')}")


Question: how is my health status

Answer:
  Based on the provided documents, your health status appears to be generally
good. You have a history of seasonal allergies and mild penicillin sensitivity,
but no other significant health concerns. Your blood pressure and cholesterol
levels have improved over the past year, and you are actively working towards
your fitness goals. You are also taking steps to improve your nutrition and
overall wellness through regular exercise and supplements. However, it is
important to continue monitoring your blood pressure and cholesterol levels, as
well as following your doctor's recommendations for reducing sodium intake and
maintaining a healthy weight. Source: Personal Health Journal - 2023-2024,
Medication Log, Exercise Routine, Recent Doctor Visits, Fitness Goals for 2024,
Nutrition Notes.


In [29]:
from langchain.prompts import PromptTemplate
# Create a custom prompt template
template = """
You are a helpful assistant that answers questions based on provided documents.
Provide proper formatted answer with proper heading and sub headings and paragraphs

Context information from documents:
{context}

Question: {question}

Answer the question based only on the provided context. If you don't know the answer or cannot find it in the context, say "I couldn't find this information in the provided documents." Include specific details and cite the sources of information.
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question"]
)

In [30]:
llm = OpenAI(temperature=0)
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)

In [31]:
query = "how is my health status"
result = qa_chain.invoke({"query": query})
print("\nQuestion:", query)

# wrapped_text = textwrap.fill(result["result"], width=80)  # Adjust width as needed
# print("\nAnswer:\n", wrapped_text)
print("\nAnswer:", result["result"])


Question: how is my health status

Answer: 
Health Status:

Based on the provided documents, your health status appears to be generally good. You are 42 years old with a height of 5'10" and a blood type of O+. You have a history of seasonal allergies and mild penicillin sensitivity, but no other significant health concerns.

Vital Statistics:

Your vital statistics record shows that your weight has fluctuated between 181-188 lbs over the past year. Your blood pressure has also varied, but has generally been within a healthy range. Your resting heart rate has remained consistent at around 70-75 bpm. Your cholesterol levels have improved since your last annual physical, but are still slightly high. Your blood sugar levels have also been within a healthy range.

Medication:

You are currently taking Lisinopril (10mg) daily for blood pressure management, as well as Zyrtec (10mg) as needed for seasonal allergies. You also take a daily supplement of Vitamin D (2000 IU) and a multivitamin.

