# Multiple file upload at same time version:

In [None]:
import os
import tempfile
import streamlit as st
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain.chains import RetrievalQA
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

# --- SETUP ---
# ─── CONFIG & SETUP ────────────────────────────────────────────────────────────
os.environ["PINECONE_API_KEY"] = "your-pinecone-api-key"  # ← replace!

index_name = "resume-index"

# Load embedding model and Pinecone vectorstore
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embedding_model)

# Load HF LLM
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=256)
llm = HuggingFacePipeline(pipeline=pipe)

# Retrieval chain setup
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type="stuff"
)

# --- STREAMLIT UI ---
st.set_page_config(page_title="AI Resume Matcher", page_icon="🧠")
st.title("🧠 AI Resume Matcher with HuggingFace + Pinecone")

# 📤 Upload Multiple Resumes
st.markdown("### 📤 Upload Resumes")
uploaded_files = st.file_uploader(
    "Upload one or more .txt or .pdf resumes",
    type=["txt", "pdf"],
    accept_multiple_files=True
)

if uploaded_files:
    for uploaded_file in uploaded_files:
        with tempfile.NamedTemporaryFile(delete=False, suffix=f".{uploaded_file.name.split('.')[-1]}") as tmp_file:
            tmp_file.write(uploaded_file.getvalue())
            file_path = tmp_file.name

        # Determine loader + candidate name
        if uploaded_file.name.endswith(".txt"):
            loader = TextLoader(file_path)
            candidate = uploaded_file.name.replace(".txt", "")
        else:
            loader = PyPDFLoader(file_path)
            candidate = uploaded_file.name.replace(".pdf", "")

        docs = loader.load()
        for doc in docs:
            doc.metadata["candidate_name"] = candidate

        # Chunk and embed
        splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
        chunks = splitter.split_documents(docs)

        # Upsert to Pinecone
        PineconeVectorStore.from_documents(
            documents=chunks,
            embedding=embedding_model,
            index_name=index_name
        )

        st.success(f"✅ `{uploaded_file.name}` uploaded and indexed.")

# 🔍 Semantic Resume Search
st.markdown("### 🔍 Search Candidate Profiles")
query = st.text_input("Enter a skill, tool, or experience to search resumes:")

if query:
    response = qa_chain.invoke({"query": query})
    
    st.markdown("### 🤖 Answer:")
    st.write(response["result"])

    st.markdown("### 📂 Matched Resumes:")
    for i, doc in enumerate(response["source_documents"]):
        st.markdown(f"**{i+1}. Candidate:** `{doc.metadata.get('candidate_name')}`")
        with st.expander("📄 Snippet"):
            st.write(doc.page_content.strip())