This integrates:


1.	ConversationSummaryMemory for persistent, summarized chat history
2.	A “Clear Chat History” button
3.	Full conversational UI with sources, summary, and history

In [None]:
import os
import tempfile
import streamlit as st
import pandas as pd
from pinecone import Pinecone
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from langchain_pinecone import PineconeVectorStore
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationSummaryMemory
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sklearn.metrics.pairwise import cosine_similarity

# ─── CONFIG & SETUP ────────────────────────────────────────────────────────────
os.environ["PINECONE_API_KEY"] = "your-pinecone-api-key"  # ← replace!
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

RESUME_INDEX = "resume-index"
JD_INDEX     = "jd-index"

# Connect to existing Pinecone indexes
embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
resume_store   = PineconeVectorStore(index_name=RESUME_INDEX, embedding=embedding_model)
jd_store       = PineconeVectorStore(index_name=JD_INDEX,     embedding=embedding_model)

# ─── LLM & QA CHAIN w/ SUMMARY MEMORY ──────────────────────────────────────────
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
model     = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
pipe      = pipeline("text2text-generation", model=model, tokenizer=tokenizer, max_length=256)
llm       = HuggingFacePipeline(pipeline=pipe)

# Summarizing memory for conversational QA
summary_memory = ConversationSummaryMemory(
    llm=llm,
    memory_key="chat_history",
    return_messages=True
)

# Conversational RetrievalQA chain (memory-aware)
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=resume_store.as_retriever(),
    memory=summary_memory,
    return_source_documents=True
)

# ─── SKILL EXTRACTION ──────────────────────────────────────────────────────────
ner_model = pipeline("ner", model="dslim/bert-base-NER", aggregation_strategy="simple")

def extract_skills_hf(text):
    ents = ner_model(text)
    return list({e["word"] for e in ents if e["entity_group"] in ["ORG", "MISC"]})

def extract_skills_from_text(text):
    return extract_skills_hf(text)

# ─── STREAMLIT UI ───────────────────────────────────────────────────────────────
st.set_page_config(page_title="AI Resume Matcher", page_icon="🧠", layout="wide")
st.title("🧠 AI Resume Matcher with Memory & Bidirectional RAG")

# Mode switch
mode = st.radio(
    "🧭 Select Mode:",
    ["Recruiter: JD → Resumes", "Candidate: Resume → JDs"],
    horizontal=True
)

# Sidebar: Reset indexes & global skill filter
with st.sidebar:
    st.markdown("## 🧹 Cleanup Tools")
    with st.expander("⚠️ Reset Indexes"):
        st.warning("This deletes and recreates both Pinecone indexes!")
        confirm = st.checkbox("I understand")
        pwd     = st.text_input("Admin password:", type="password")
        if confirm and pwd == "123" and st.button("Confirm Reset"):
            with st.spinner("Resetting..."):
                for idx in [RESUME_INDEX, JD_INDEX]:
                    pc.delete_index(idx)
                    pc.create_index(name=idx, dimension=384, metric="cosine",
                                    spec={"cloud":"aws","region":"us-east-1"})
                st.success("Indexes reset.")
        elif confirm and pwd:
            st.error("Wrong password")

    st.markdown("## 🎯 Filter by Skill (Global)")
    ALL_SKILLS     = ["Python","Docker","Streamlit","LangChain","Pinecone","Transformers","Kubernetes"]
    selected_skill = st.selectbox("Show only candidates with:", ["Show All"] + ALL_SKILLS)

# ─── RECRUITER MODE: JD → Resumes ───────────────────────────────────────────────
if mode == "Recruiter: JD → Resumes":
    st.header("📥 Upload & Index Resumes")
    uploaded = st.file_uploader("Upload .txt/.pdf resumes", type=["txt","pdf"], accept_multiple_files=True)
    if uploaded:
        for f in uploaded:
            with tempfile.NamedTemporaryFile(delete=False, suffix="."+f.name.split(".")[-1]) as tmp:
                tmp.write(f.getvalue()); path = tmp.name
            loader = TextLoader(path) if f.name.endswith(".txt") else PyPDFLoader(path)
            docs = loader.load(); candidate = f.name.rsplit(".",1)[0]
            for doc in docs:
                doc.metadata["candidate_name"] = candidate
                doc.metadata["skills"]         = extract_skills_hf(doc.page_content[:1000])
            chunks = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50).split_documents(docs)
            PineconeVectorStore.from_documents(documents=chunks, embedding=embedding_model,
                                               index_name=RESUME_INDEX)
            st.success(f"Indexed {f.name}")

    st.header("📥 Upload & Index Job Descriptions")
    jd_files = st.file_uploader("Upload .txt JDs", type=["txt"], accept_multiple_files=True, key="jd_up")
    if jd_files:
        from langchain.schema import Document
        for f in jd_files:
            text = f.read().decode("utf-8")
            doc  = Document(page_content=text, metadata={"jd_name":f.name})
            chunks = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50).split_documents([doc])
            PineconeVectorStore.from_documents(documents=chunks, embedding=embedding_model,
                                               index_name=JD_INDEX)
            st.success(f"Indexed JD {f.name}")

    # JD → Resume matching
    st.header("📄 Match Candidates by Job Description")
    jd_text = st.text_area("Paste a Job Description")
    if jd_text:
        if st.button("🧠 Extract Skills from JD"):
            jd_skills = extract_skills_from_text(jd_text)
            st.success("Extracted: " + ", ".join(jd_skills))
        jd_skills = locals().get("jd_skills", [])
        custom    = st.text_input("✏️ Edit JD skills (comma-separated):")
        if custom: jd_skills = [s.strip() for s in custom.split(",")]

        st.subheader("🧪 Skill Filtering")
        must = st.multiselect("✅ Must-Have", jd_skills, default=jd_skills[:2])
        nice = st.multiselect("👍 Nice-to-Have", [s for s in jd_skills if s not in must])

        # embed + search
        jd_vec  = embedding_model.embed_query(jd_text)
        results = resume_store.similarity_search_with_score(jd_text, k=10)

        scored = []
        for doc, _ in results:
            cos = cosine_similarity([jd_vec],
                     [embedding_model.embed_query(doc.page_content)])[0][0]
            ds  = doc.metadata.get("skills",[])
            if must and not set(must).issubset(ds): continue
            ov = (len(set(must)&ds) + 0.5*len(set(nice)&ds)) / (len(must)+len(nice)) \
                 if (must or nice) else 0
            final = 0.7*cos + 0.3*ov
            scored.append((doc, final))

        scored.sort(key=lambda x: x[1], reverse=True)

        # Top-3 dashboard
        st.subheader("🏆 Top 3 Matches")
        for i,(doc,score) in enumerate(scored[:3]):
            st.markdown(f"**{i+1}. {doc.metadata['candidate_name']}** — Score: {score:.3f}")
            st.markdown(f"🔧 Skills: {', '.join(doc.metadata['skills'])}")
            st.markdown("---")

        # Full list + CSV
        st.subheader("📂 All Matches")
        export=[]
        for i,(doc,score) in enumerate(scored):
            ds = doc.metadata["skills"]
            if selected_skill!="Show All" and selected_skill not in ds: continue
            st.markdown(f"**{i+1}. {doc.metadata['candidate_name']}** — Score: {score:.3f}")
            st.markdown(f"🔧 Skills: {', '.join(ds)}")
            with st.expander("📄 Snippet"):
                st.write(doc.page_content.strip())
            export.append({
                "Rank":i+1,
                "Candidate":doc.metadata["candidate_name"],
                "Skills": ",".join(ds),
                "Score":f"{score:.3f}",
                "Snippet":doc.page_content[:200]
            })
        if export:
            df  = pd.DataFrame(export)
            csv= df.to_csv(index=False).encode("utf-8")
            st.download_button("📥 Download CSV", csv, "matches.csv", "text/csv")

# ─── CANDIDATE MODE: Resume → JDs ───────────────────────────────────────────────
else:
    st.header("🔁 Upload Your Resume to Find Matching Jobs")
    rev = st.file_uploader("Upload .txt/.pdf", type=["txt","pdf"], key="rev")
    if rev:
        with tempfile.NamedTemporaryFile(delete=False, suffix="."+rev.name.split(".")[-1]) as tmp:
            tmp.write(rev.getvalue()); path=tmp.name
        loader = TextLoader(path) if rev.name.endswith(".txt") else PyPDFLoader(path)
        text   = loader.load()[0].page_content
        jd_res = jd_store.similarity_search_with_score(text, k=10)
        st.subheader("🏅 Top Matching JDs")
        for i,(jd_doc,score) in enumerate(jd_res[:5]):
            st.markdown(f"**{i+1}. {jd_doc.metadata['jd_name']}** — Score: {score:.3f}")
            with st.expander("📄 Preview JD"):
                st.write(jd_doc.page_content.strip())

# ─── CONVERSATIONAL RAG: Memory + Chat UI ──────────────────────────────────────
st.header("💬 Conversational QA with Memory")
# Clear chat
if st.button("🧹 Clear Chat History"):
    summary_memory.clear()
    st.success("Chat history cleared!")
# Chat input
query = st.text_input("Ask a follow-up question:")
if query:
    res = qa_chain.invoke({"question": query})
    st.markdown("### 🤖 Answer")
    st.write(res["answer"])
    if "source_documents" in res:
        st.markdown("#### 📂 Sources")
        for i,doc in enumerate(res["source_documents"]):
            st.markdown(f"{i+1}. {doc.metadata.get('candidate_name','—')}")
            st.write(doc.page_content[:200] + "...")
# Show chat history summary and details
st.markdown("### 📝 Conversation Summary")
st.write(summary_memory.buffer)
st.markdown("### 🗂️ Full Chat History")
for msg in summary_memory.chat_memory.messages:
    role = "🧑‍💻 You" if msg.type=="human" else "🤖 Bot"
    st.markdown(f"**{role}:** {msg.content}")