In [1]:
# create only one trace of all the process unlike before

In [2]:
# pip install -U langchain langchain-openai langchain-community faiss-cpu pypdf python-dotenv langsmith

import os
from dotenv import load_dotenv

from langsmith import traceable

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser


In [3]:
os.environ['LANGCHAIN_PROJECT'] = "RAG Chatbot"

In [4]:
load_dotenv()


True

In [5]:
PDF_PATH = "resume.pdf"


In [6]:
# ----------------- helpers (not traced individually) -----------------
@traceable(name="load_pdf")
def load_pdf(path: str):
    loader = PyPDFLoader(path)
    return loader.load()  # list[Document]

In [7]:
@traceable(name="split_documents")
def split_documents(docs, chunk_size=1000, chunk_overlap=150):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap
    )
    return splitter.split_documents(docs)

In [8]:
@traceable(name="build_vectorstore")
def build_vectorstore(splits):
    emb = OpenAIEmbeddings(model="text-embedding-3-small", base_url="https://openrouter.ai/api/v1")
    return FAISS.from_documents(splits, emb)

In [9]:
# ----------------- parent setup function (traced) -----------------
@traceable(name="setup_pipeline", tags=["setup"])
def setup_pipeline(pdf_path: str, chunk_size=1000, chunk_overlap=150):
    # ✅ These three steps are “clubbed” under this parent function
    docs = load_pdf(pdf_path)
    splits = split_documents(docs, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    vs = build_vectorstore(splits)
    return vs

In [10]:
# ----------------- model, prompt, and run -----------------
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, base_url="https://openrouter.ai/api/v1")

In [11]:
prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer ONLY from the provided context. If not found, say you don't know."),
    ("human", "Question: {question}\n\nContext:\n{context}")
])

In [12]:
def format_docs(docs):
    return "\n\n".join(d.page_content for d in docs)


In [13]:
# ----------------- one top-level (root) run -----------------
@traceable(name="pdf_rag_full_run")
def setup_pipeline_and_query(pdf_path: str, question: str):
    # Parent setup run (child of root)
    vectorstore = setup_pipeline(pdf_path, chunk_size=1000, chunk_overlap=150)

    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 4})

    parallel = RunnableParallel({
        "context": retriever | RunnableLambda(format_docs),
        "question": RunnablePassthrough(),
    })

    chain = parallel | prompt | llm | StrOutputParser()

    # This LangChain run stays under the same root (since we're inside this traced function)
    lc_config = {"run_name": "pdf_rag_query"}
    return chain.invoke(question, config=lc_config)

In [14]:
# ----------------- CLI -----------------
if __name__ == "__main__":
    print("PDF RAG ready. Ask a question (or Ctrl+C to exit).")
    q = input("\nQ: ").strip()
    ans = setup_pipeline_and_query(PDF_PATH, q)
    print("\nA:", ans)
    

PDF RAG ready. Ask a question (or Ctrl+C to exit).



Q:  my skills



A: Your skills include:

- **Programming Languages**: Python, C/C++
- **Data Manipulation**: Pandas, NumPy, Data Wrangling, Data Cleaning, Feature Engineering
- **Data Visualization**: Matplotlib, Seaborn, Plotly
- **Machine Learning**: Scikit-learn, Model Deployment, Model Evaluation
- **Natural Language Processing (NLP)**: Text Preprocessing, Sentiment Analysis, Named Entity Recognition (NER), Word Embeddings (Word2Vec), SpaCy
- **Model Deployment**: Flask API, Docker, Heroku, Vercel, Streamlit, Netlify
- **Data Stores**: MySQL, PostgreSQL, MongoDB
- **Version Control**: Git, GitHub, GitLab
- **Operating Systems**: Windows, Linux (Ubuntu, Kali Linux), macOS
- **Tools & Libraries**: Selenium, BeautifulSoup, Jupyter Notebooks, VS Code, Colab

Additionally, you have experience in building intelligent systems, real-time monitoring applications, and working with machine learning models and data analysis. You have also participated in various hackathons and events, showcasing your problem