In [9]:
from langchain.llms import Ollama
from langchain.document_loaders import PyPDFLoader
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

# Step 1: Load the PDF
loader = PyPDFLoader("job.pdf")  # Replace with your PDF's filename
documents = loader.load()

# Step 2: Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
chunks = splitter.split_documents(documents)

# Step 3: Embed chunks using a local sentence transformer model
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Step 4: Store in FAISS vector DB
vectorstore = FAISS.from_documents(chunks, embedding)

# Step 5: Set up retriever
retriever = vectorstore.as_retriever()

# Step 6: Load the Ollama model
llm = Ollama(model="gemma3:4b")

# Step 7: Wrap everything in RetrievalQA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=False
)

# Step 8: Ask a question
# query = "what are preferred qualifications for this job posting?"
# query = "how many years of experience is required for this job posting?"
query = "which compnay has done this job posting and what exactly are they looking for in a suitable candidate?"
response = qa_chain(query)
print(response)


{'query': 'which compnay has done this job posting and what exactly are they looking for in a suitable candidate?', 'result': 'Based on the provided text, the company that has done this job posting is D. E. Shaw India. They are looking for a highly experienced software developer to join their “Strike” team. The ideal candidate should have a master’s or bachelor’s degree in Computer Science or a related field.'}
