✅ Multiple resume upload
✅ JD input box
✅ Cosine similarity scoring
✅ Ranked candidate output with scores

In [None]:
import os
import tempfile
import streamlit as st
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sklearn.metrics.pairwise import cosine_similarity

# --- SETUP ---
os.environ["PINECONE_API_KEY"] = "pcsk_6ANMxB_NBF6TZziCKrn6kWNDskfdQzUj5GU7AJYtFWkWwsRefuXBdrJxRSxrvRe1Y2Nbi2"  # Replace with your key
pc = Pinecone()
index_name = "resume-index"

# Embeddings & vectorstore
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embedding_model)

# HuggingFace LLM
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=256)
llm = HuggingFacePipeline(pipeline=pipe)

# RAG QA Chain
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, chain_type="stuff")

# --- STREAMLIT UI ---
st.set_page_config(page_title="AI Resume Matcher", page_icon="🧠")
st.title("🧠 AI Resume Matcher with HuggingFace + Pinecone")

# 📤 Upload Multiple Resumes
st.markdown("### 📤 Upload Resumes")
uploaded_files = st.file_uploader("Upload one or more .txt or .pdf resumes", type=["txt", "pdf"], accept_multiple_files=True)

if uploaded_files:
    for uploaded_file in uploaded_files:
        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
            tmp_file.write(uploaded_file.getvalue())
            file_path = tmp_file.name

        if uploaded_file.name.endswith(".txt"):
            loader = TextLoader(file_path)
            candidate = uploaded_file.name.replace(".txt", "")
        else:
            loader = PyPDFLoader(file_path)
            candidate = uploaded_file.name.replace(".pdf", "")

        docs = loader.load()
        for doc in docs:
            doc.metadata["candidate_name"] = candidate

        splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
        chunks = splitter.split_documents(docs)

        PineconeVectorStore.from_documents(
            documents=chunks,
            embedding=embedding_model,
            index_name=index_name
        )

        st.success(f"✅ `{uploaded_file.name}` uploaded and indexed.")

# 🔍 Search for Skill/Tool
st.markdown("### 🔍 Search Candidate Profiles")
query = st.text_input("Enter a skill, tool, or experience to search resumes:")

if query:
    response = qa_chain.invoke({"query": query})

    st.markdown("### 🤖 Answer:")
    st.write(response["result"])

    st.markdown("### 📂 Matched Resumes:")
    for i, doc in enumerate(response["source_documents"]):
        st.markdown(f"**{i+1}. Candidate:** `{doc.metadata.get('candidate_name')}`")
        with st.expander("📄 Snippet"):
            st.write(doc.page_content.strip())

# 📄 JD-to-Candidate Matching
st.markdown("### 📄 Match Candidates by Job Description")
jd_text = st.text_area("Paste a job description here:")

if jd_text:
    # Embed the JD
    jd_vector = embedding_model.embed_query(jd_text)

    # Search and score
    search_results = vectorstore.similarity_search_with_score(jd_text, k=5)

    scored_candidates = []
    for doc, _ in search_results:
        doc_vector = embedding_model.embed_query(doc.page_content)
        score = cosine_similarity([jd_vector], [doc_vector])[0][0]
        scored_candidates.append((doc, score))

    # Sort by similarity score
    scored_candidates.sort(key=lambda x: x[1], reverse=True)

    st.markdown("### 🧠 Top Matching Candidates")
    for i, (doc, score) in enumerate(scored_candidates):
        st.markdown(f"**{i+1}. Candidate:** `{doc.metadata.get('candidate_name')}` — 🟩 Score: `{score:.3f}`")
        with st.expander("📄 Snippet"):
            st.write(doc.page_content.strip())

🔁 1. Uploading Resumes
	•	Input: .txt or .pdf files (one or many)
	•	Processing:
	•	Saved temporarily
	•	Loaded using TextLoader or PyPDFLoader
	•	Each document tagged with candidate_name (from filename)
	•	Split into smaller text chunks (300 tokens with 50 overlap)
	•	Chunks embedded via all-MiniLM-L6-v2
	•	Inserted into Pinecone under index name resume-index
	•	Output: Each resume is now stored in vector format, searchable with metadata.

🔍 2. Direct Search Interface
	•	Input: A user types a phrase like "Streamlit and LangChain"
	•	Processing:
	•	The phrase is passed to RetrievalQA chain (with HuggingFace LLM)
	•	Pinecone retrieves top-matching resume chunks
	•	The LLM generates a natural language answer using those chunks
	•	Output:
	•	🧠 Answer (e.g., “Ali Rahman worked on Streamlit…”)
	•	📂 List of matched resumes (with candidate_name)
	•	🔎 Expandable view of matched resume snippets


📄 3. JD-to-Resume Matching
	•	Input: A full job description pasted into the textarea
	•	Processing:
	•	Job description is converted into an embedding
	•	Pinecone retrieves top 5 matching chunks
	•	Each retrieved chunk is compared to the JD embedding using cosine similarity
	•	Each chunk is scored and ranked from most to least relevant
	•	Output:
	•	✅ Ranked list of candidates
	•	🟩 Score (cosine similarity between JD and resume chunk)
	•	📄 Expandable snippet from the matched part of resume

In [2]:
"""
with tempfile.NamedTemporaryFile(delete=False, suffix=...) as tmp_file:
    tmp_file.write(uploaded_file.getvalue())
    file_path = tmp_file.name
"""

'\nwith tempfile.NamedTemporaryFile(delete=False, suffix=...) as tmp_file:\n    tmp_file.write(uploaded_file.getvalue())\n    file_path = tmp_file.name\n'

💡 What this does:

1.	tempfile.NamedTemporaryFile(...)
→ Creates a temporary file on disk
→ Unique name and system-controlled location (like /tmp/... on Unix/Mac)
2.	tmp_file.write(...)
→ Saves the uploaded file’s content into that temp file
3.	file_path = tmp_file.name
→ Stores the full file path to use in the next step (loading, parsing, embedding)


📍 Cleanup:

These temp files aren’t automatically deleted unless you:
1. Manually delete them later, or
2. Use delete=True (which doesn’t work with some loaders during read)