1. Multi-resume upload
2. Semantic search
3. JD-based ranking with scores
4. Pinecone cleanup button with confirmation + password

You‚Äôre now fully equipped with:
1. Candidate resume intake
2. JD-based match scoring
3. Admin control panel for reset

In [None]:
import os
import tempfile
import streamlit as st
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sklearn.metrics.pairwise import cosine_similarity

# --- SETUP ---
os.environ["PINECONE_API_KEY"] = "pcsk_6ANMxB_NBF6TZziCKrn6kWNDskfdQzUj5GU7AJYtFWkWwsRefuXBdrJxRSxrvRe1Y2Nbi2"  # Replace this!
pc = Pinecone()
index_name = "resume-index"

# Load embedding model and vectorstore
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embedding_model)

# HuggingFace LLM setup (Flan-T5)
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=256)
llm = HuggingFacePipeline(pipeline=pipe)

# Retrieval QA Chain
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=True, chain_type="stuff")

# --- STREAMLIT UI ---
st.set_page_config(page_title="AI Resume Matcher", page_icon="üß†")
st.title("üß† AI Resume Matcher with HuggingFace + Pinecone")

# üì§ Upload Resumes
st.markdown("### üì§ Upload Resumes")
uploaded_files = st.file_uploader("Upload one or more .txt or .pdf resumes", type=["txt", "pdf"], accept_multiple_files=True)

if uploaded_files:
    for uploaded_file in uploaded_files:
        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
            tmp_file.write(uploaded_file.getvalue())
            file_path = tmp_file.name

        # Load document + assign candidate name
        if uploaded_file.name.endswith(".txt"):
            loader = TextLoader(file_path)
            candidate = uploaded_file.name.replace(".txt", "")
        else:
            loader = PyPDFLoader(file_path)
            candidate = uploaded_file.name.replace(".pdf", "")

        docs = loader.load()
        for doc in docs:
            doc.metadata["candidate_name"] = candidate

        # Chunk + embed
        splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
        chunks = splitter.split_documents(docs)

        PineconeVectorStore.from_documents(
            documents=chunks,
            embedding=embedding_model,
            index_name=index_name
        )

        st.success(f"‚úÖ `{uploaded_file.name}` uploaded and indexed.")

# üîç Direct Search Interface
st.markdown("### üîç Search Candidate Profiles")
query = st.text_input("Enter a skill, tool, or experience to search resumes:")

if query:
    response = qa_chain.invoke({"query": query})
    st.markdown("### ü§ñ Answer:")
    st.write(response["result"])
    st.markdown("### üìÇ Matched Resumes:")
    for i, doc in enumerate(response["source_documents"]):
        st.markdown(f"**{i+1}. Candidate:** `{doc.metadata.get('candidate_name')}`")
        with st.expander("üìÑ Snippet"):
            st.write(doc.page_content.strip())

# üìÑ JD-Based Candidate Ranking
st.markdown("### üìÑ Match Candidates by Job Description")
jd_text = st.text_area("Paste a job description here:")

if jd_text:
    jd_vector = embedding_model.embed_query(jd_text)
    search_results = vectorstore.similarity_search_with_score(jd_text, k=5)

    scored_candidates = []
    for doc, _ in search_results:
        doc_vector = embedding_model.embed_query(doc.page_content)
        score = cosine_similarity([jd_vector], [doc_vector])[0][0]
        scored_candidates.append((doc, score))

    scored_candidates.sort(key=lambda x: x[1], reverse=True)

    st.markdown("### üß† Top Matching Candidates")
    for i, (doc, score) in enumerate(scored_candidates):
        st.markdown(f"**{i+1}. Candidate:** `{doc.metadata.get('candidate_name')}` ‚Äî üü© Score: `{score:.3f}`")
        with st.expander("üìÑ Snippet"):
            st.write(doc.page_content.strip())

# üßπ Admin Cleanup (Reset)
st.sidebar.markdown("## üßπ Cleanup Tools")
with st.sidebar.expander("‚ö†Ô∏è Reset Pinecone Resume Index"):
    st.warning("This will permanently delete all stored resumes from Pinecone and reset the index.\n\nUse only if you're starting fresh or testing.")
    
    confirm = st.checkbox("I understand and want to proceed")
    password = st.text_input("Enter admin password to confirm:", type="password")

    if confirm and password:
        if password == "123":
            if st.button("‚ùå Confirm Reset"):
                with st.spinner("Resetting Pinecone index..."):
                    pc.delete_index(index_name)
                    pc.create_index(
                        name=index_name,
                        dimension=384,
                        metric="cosine",
                        spec={"cloud": "aws", "region": "us-east-1"}
                    )
                    st.success("‚úÖ Pinecone index cleared. Ready to upload fresh resumes.")
        else:
            st.error("üö´ Incorrect password. Reset aborted.")