In [8]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
import os, re
from langchain_community.vectorstores import Chroma
from langchain.schema import Document
from langchain_experimental.text_splitter import SemanticChunker


In [87]:
# 1. Load multiple PDFs
pdf_dir = "./data"  # e.g. ./pdfs
all_docs = []
c = 1
for file in os.listdir(pdf_dir):
    if file.endswith(".pdf"):
        loader = PyPDFLoader(os.path.join(pdf_dir, file))
        all_docs.extend(loader.load())
        print(c)
        c+=1



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57


In [88]:
# Step 2: Clean text function
def clean_text(text):
    text = re.sub(r'\s+', ' ', text)               # Remove extra spaces/newlines
    text = re.sub(r'[^\x00-\x7F]+', '', text)      # Remove non-ASCII characters
    return text.strip()

In [89]:
# Step 3: Remove empty pages and clean content
all_docs = [
    Document(page_content=clean_text(doc.page_content), metadata=doc.metadata)
    for doc in all_docs if doc.page_content.strip()
]


In [4]:

from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [None]:

text_splitter = SemanticChunker(embeddings, breakpoint_threshold_type="percentile")
chunks = text_splitter.split_documents(all_docs)

In [92]:
# 4. Create  vector store
vectorstore = FAISS.from_documents(chunks, embeddings)

# Save locally to folder
vectorstore.save_local("faiss_index")


In [5]:
from langchain.vectorstores import FAISS

vectorstore = FAISS.load_local(
    "faiss_index",
    embeddings,
    allow_dangerous_deserialization=True  # ⚠️ Only if file is safe
)


In [6]:
system_prompt= """You are a highly knowledgeable and ethical Financial Analyst.
Your primary responsibility is to provide accurate, factual, and concise answers to user questions based *solely* on the financial books and documents provided in the "Context" section.

---
Context:
{retrieved_context}
---

User's Question: {user_question}

Instructions:
1.  **Strictly adhere to the provided "Context".** Do not use any outside knowledge, personal opinions, or make assumptions.
2.  **Focus on factual information.** Extract and synthesize relevant data directly from the context.
3.  **Maintain a professional and objective tone.**
4.  **Do NOT provide financial advice, recommendations, or predictions.** Your role is to inform based on the text, not to advise on investments or financial decisions.
5.  If the "User's Question" cannot be answered using *any* information within the provided "Context", state clearly: "I cannot answer this question based on the provided financial documents."
6.  If the context contains conflicting information, state that the information is conflicting and provide both perspectives if possible, citing the relevant parts of the context.
7.  Format your answer clearly, using bullet points or numbered lists where appropriate for readability.

Answer:"""

In [9]:
# 6. Initialize free local LLM via Ollama
llm = Ollama(model="mistral")

  llm = Ollama(model="mistral")


In [10]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 5},  # Number of top chunks to return
    chain_type="stuff",
    chain_type_kwargs={"prompt": system_prompt}
)

# 7. RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, retriever=retriever, return_source_documents=True
)

# 8. Ask a question
query = "one easy way to make money"
result = qa_chain(query)

print("Answer:", result["result"])


  result = qa_chain(query)


Answer:  One easy way to potentially make money, as mentioned in the context, is by getting involved with Bitcoin transactions. This includes setting up a Bitcoin wallet, accepting Bitcoin as a form of payment for goods or services, and buying Bitcoins through recognized exchanges like Coinbase, Bitstamp, or Bittylicious. However, it's important to note that investing in any form carries risk and it's always recommended to do thorough research before making any financial decisions.


In [None]:
# Answer:  One key to becoming successful, as mentioned in the text, is the ability to communicate effectively with others, which includes writing, speaking, and negotiating skills. The text also emphasizes that constant effort in expanding this skill through courses or educational resources can be beneficial.