In [None]:
!pip -q install groq
from groq import Groq

client = Groq(api_key="yourkey")  # paste your key
resp = client.chat.completions.create(
    model="llama3-70b-8192",
    messages=[{"role":"system","content":"You are concise."},
              {"role":"user","content":"Say OK"}],
)
print(resp.choices[0].message.content)


OK


In [None]:
!pip -q install autogen-agentchat gradio PyMuPDF sentence-transformers faiss-cpu


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.1/119.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.4/101.4 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m72.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m67.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m60.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
!pip -q install "autogen-agentchat>=0.2.30" "groq>=0.11.0" gradio PyMuPDF sentence-transformers faiss-cpu


In [None]:
from autogen import AssistantAgent, UserProxyAgent

def autogen_llm_config(groq_key: str, model: str, temperature: float = 0.4):
    return {
        "config_list": [{
            "model": model,
            "api_key": groq_key,
            "base_url": "https://api.groq.com/openai",  # <- Groq, OpenAI-compatible; DO NOT add /v1
        }],
        "temperature": temperature,
        "cache_seed": None,
    }


OK


In [None]:
import os, fitz, numpy as np
import gradio as gr
from typing import List

# ---- Mini RAG (resume) ----
from sentence_transformers import SentenceTransformer
import faiss

# ---- AutoGen agents ----
try:
    from autogen import AssistantAgent, UserProxyAgent
except ImportError:
    from autogen.agentchat import AssistantAgent, UserProxyAgent  # fallback for older layouts

# ========== RAG ==========

def extract_pdf_text(uploaded_file) -> str:
    if not uploaded_file:
        return ""
    doc = fitz.open(uploaded_file.name)
    pages = []
    for i in range(len(doc)):
        pages.append(doc.load_page(i).get_text("text") or "")
    return "\n".join(pages).strip()

class MiniRAG:
    def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        self.embedder = SentenceTransformer(model_name)
        self.index = None
        self.chunks: List[str] = []

    def _chunk(self, text: str, chunk_size=900, overlap=150) -> List[str]:
        text = text or ""
        parts, i = [], 0
        step = max(1, chunk_size - overlap)
        while i < len(text):
            parts.append(text[i:i+chunk_size])
            i += step
        return [p.strip() for p in parts if p.strip()]

    def build(self, text: str):
        self.chunks = self._chunk(text)
        if not self.chunks:
            self.index = None
            return
        vecs = self.embedder.encode(self.chunks, normalize_embeddings=True).astype("float32")
        self.index = faiss.IndexFlatIP(vecs.shape[1])
        self.index.add(vecs)

    def retrieve(self, query: str, k=6) -> str:
        if not self.index or not self.chunks:
            return ""
        qv = self.embedder.encode([query], normalize_embeddings=True).astype("float32")
        _, idx = self.index.search(qv, min(k, len(self.chunks)))
        return "\n\n".join(self.chunks[i] for i in idx[0])

# ========== AutoGen on Groq (NO OpenAI) ==========

def autogen_llm_config(groq_key: str, model: str = "llama3-70b-8192", temperature: float = 0.4):
    return {
        "config_list": [{
            "model": model,
            "api_key": groq_key,
            "base_url": "https://api.groq.com/openai",  # NOTE: DO NOT add /v1
        }],
        "temperature": temperature,
        "cache_seed": None,
    }

# ---------- Agent prompts (compact helpers) ----------

def step_parse_resume(resume_ctx: str) -> str:
    return f"""Extract a compact, structured summary from RESUME EXCERPTS:

RESUME EXCERPTS:
---
{resume_ctx}
---

Return:
- Skills/Tools: <comma-separated>
- Quantified Achievements:
  • <one line with numbers>
  • <one line with numbers>
  • <one line with numbers>
- Recent Roles:
  • <title, company, years>
  • <title, company, years>
"""

def step_analyze_jd(job_role: str, company: str, context: str) -> str:
    return f"""ROLE: {job_role}
COMPANY: {company}

CONTEXT / JD SNIPPETS:
---
{context}
---

Return:
- Must-have skills (comma-separated)
- Top 3 responsibilities/outcomes
- Preferred tone/values to mirror (one line)
"""

def step_matchmake(parsed_resume: str, jd_insights: str) -> str:
    return f"""Using the parsed resume and JD insights, build an outline that maps requirements to evidence.

RESUME SIGNALS:
---
{parsed_resume}
---

JD INSIGHTS:
---
{jd_insights}
---

Outline:
1) Hook (1–2 sentences)
2) Mappings (3 bullets): <JD requirement> -> <resume evidence with metrics>
3) Close (1–2 sentences)
"""

def step_write(outline: str, job_role: str, company: str, tone: str) -> str:
    return f"""Write a 3-paragraph cover letter for "{job_role}" at "{company}".
Tone: {tone}. Max ~280 words.

Use this outline:
---
{outline}
---
Avoid clichés; mirror essential keywords naturally."""

def step_review(draft: str, jd_insights: str, max_words: int = 280) -> str:
    return f"""Revise the draft to improve JD keyword coverage, clarity, and keep under {max_words} words.

DRAFT:
---
{draft}
---

JD INSIGHTS:
---
{jd_insights}
---

Return only the final letter."""

# ---------- Multi-agent pipeline ----------

def run_agents(
    groq_key: str, model_name: str, temperature: float,
    resume_pdf, resume_text, job_role: str, company: str, company_context: str, tone: str
) -> str:
    try:
        if not groq_key:
            return "⚠️ Please paste your GROQ_API_KEY."

        # 1) Resume text (PDF or pasted)
        text = extract_pdf_text(resume_pdf) if resume_pdf else ""
        if not text:
            text = (resume_text or "").strip()

        if not (text and job_role and company):
            return "⚠️ Need resume text (upload or paste), job role, and company."

        # 2) RAG to focus on the most relevant resume snippets
        rag = MiniRAG()
        rag.build(text)
        resume_ctx = rag.retrieve(f"{job_role} required skills responsibilities achievements")

        # 3) AutoGen config + agents (Groq backend)
        cfg = autogen_llm_config(groq_key, model=model_name, temperature=temperature)
        _ = UserProxyAgent(name="user", human_input_mode="NEVER")  # not used directly, but fine to keep

        parser = AssistantAgent(
            name="parser",
            system_message="Resume Parser Agent: produce compact, structured signals.",
            llm_config=cfg,
        )
        jd_agent = AssistantAgent(
            name="jd_analyzer",
            system_message="JD Analyzer Agent: extract must-haves, outcomes, tone.",
            llm_config=cfg,
        )
        matchmaker = AssistantAgent(
            name="matchmaker",
            system_message="Matchmaker Agent: map JD requirements to resume evidence; produce outline.",
            llm_config=cfg,
        )
        writer = AssistantAgent(
            name="writer",
            system_message="Writer Agent: write a 3-paragraph cover letter with active voice and metrics.",
            llm_config=cfg,
        )
        reviewer = AssistantAgent(
            name="reviewer",
            system_message="Reviewer Agent: tighten keyword coverage, remove redundancy, keep under limit.",
            llm_config=cfg,
        )

        # 4) Sequential agent calls with one critique–revise loop
        parsed_resume = parser.generate_reply(messages=[{"role":"user","content": step_parse_resume(resume_ctx)}]).strip()
        jd_insights   = jd_agent.generate_reply(messages=[{"role":"user","content": step_analyze_jd(job_role, company, company_context)}]).strip()
        outline       = matchmaker.generate_reply(messages=[{"role":"user","content": step_matchmake(parsed_resume, jd_insights)}]).strip()
        draft         = writer.generate_reply(messages=[{"role":"user","content": step_write(outline, job_role, company, tone)}]).strip()
        final_letter  = reviewer.generate_reply(messages=[{"role":"user","content": step_review(draft, jd_insights, 280)}]).strip()

        # Optional second pass if reviewer complains (simple heuristic)
        if "ADD MORE KEYWORDS" in final_letter.upper():
            draft2       = writer.generate_reply(messages=[{"role":"user","content": step_write(outline, job_role, company, tone)}]).strip()
            final_letter = reviewer.generate_reply(messages=[{"role":"user","content": step_review(draft2, jd_insights, 280)}]).strip()

        return final_letter

    except Exception as e:
        return f"⚠️ Error: {e}"

# ========== Gradio UI ==========

with gr.Blocks(title="AutoGen Multi-Agent Cover Letter (Groq + RAG)") as demo:
    gr.Markdown("## 🤖 AutoGen Multi-Agent Cover Letter (Groq + RAG)")
    gr.Markdown("Parser → JD Analyzer → Matchmaker → Writer → Reviewer (with a critique–revise loop). **No OpenAI.**")

    with gr.Row():
        groq_key = gr.Textbox(label="GROQ_API_KEY", type="password")
        model = gr.Dropdown(choices=["llama3-70b-8192","llama3-8b-8192","mixtral-8x7b-32768"], value="llama3-70b-8192", label="Groq Model")
        temperature = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Temperature")
        tone = gr.Dropdown(choices=["Professional","Confident","Friendly","Concise"], value="Professional", label="Tone")

    with gr.Row():
        resume_pdf = gr.File(label="Upload Resume (PDF optional)")
        resume_text = gr.Textbox(lines=10, label="Or paste Resume text")

    with gr.Row():
        job_role = gr.Textbox(label="Job Role / Title")
        company = gr.Textbox(label="Company")
    company_context = gr.Textbox(lines=8, label="Company Context / JD snippet (optional)")

    go = gr.Button("Generate with Agents ✨")
    out = gr.Textbox(lines=22, label="Final Cover Letter", show_copy_button=True)

    go.click(run_agents, inputs=[groq_key, model, temperature, resume_pdf, resume_text, job_role, company, company_context, tone], outputs=[out])

demo.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c421748bca398cd37a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://c421748bca398cd37a.gradio.live




In [None]:
import os
import fitz  # PyMuPDF
import gradio as gr
from typing import List

# --- RAG pieces (LangChain-compatible embeddings + FAISS) ---
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter

# --- AutoGen (agents) ---
try:
    from autogen import AssistantAgent, UserProxyAgent
except ImportError:
    # some versions export from autogen.agentchat
    from autogen.agentchat import AssistantAgent, UserProxyAgent  # type: ignore

# =========================================
# Helpers: PDF → text, chunking, vectorstore
# =========================================
def extract_text_from_pdf(uploaded_file) -> str:
    """
    Gradio's File component passes an object with .name (temp path).
    """
    if not uploaded_file:
        return ""
    path = getattr(uploaded_file, "name", None) or uploaded_file
    doc = fitz.open(path)
    text_pages = []
    for i in range(len(doc)):
        text_pages.append(doc.load_page(i).get_text("text") or "")
    return "\n".join(text_pages).strip()

def split_text_into_chunks(text: str, chunk_size=1000, chunk_overlap=200) -> List[str]:
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len
    )
    return splitter.split_text(text or "")

def create_vector_store(chunks: List[str]):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.from_texts(chunks, embeddings)

def perform_rag(vector_store, query: str, k: int = 6) -> str:
    retriever = vector_store.as_retriever(search_kwargs={"k": k})
    docs = retriever.get_relevant_documents(query)
    return "\n\n".join(d.page_content for d in docs if getattr(d, "page_content", ""))

# =========================================
# AutoGen on Groq (NO OpenAI)
# =========================================
def autogen_llm_config(groq_key: str, model: str = "llama-3.3-70b-versatile", temperature: float = 0.4):
    if not groq_key:
        raise ValueError("Missing GROQ_API_KEY. Set it in the env or paste in the UI and set os.environ['GROQ_API_KEY'].")
    return {
        "config_list": [{
            "model": model,
            "api_key": groq_key,
            "base_url": "https://api.groq.com/openai/v1",  # IMPORTANT: include /v1
        }],
        "temperature": temperature,
        "cache_seed": None,
    }

# ====== Agent step prompts (compact & focused) ======
def step_parse_resume(resume_ctx: str) -> str:
    return f"""Extract a compact, structured summary from RESUME EXCERPTS:

RESUME EXCERPTS:
---
{resume_ctx}
---

Return:
- Skills/Tools: <comma-separated>
- Quantified Achievements:
  • <one line with numbers>
  • <one line with numbers>
  • <one line with numbers>
- Recent Roles:
  • <title, company, years>
  • <title, company, years>
"""

def step_analyze_jd(job_role: str, company: str, context: str) -> str:
    return f"""ROLE: {job_role}
COMPANY: {company}

COMPANY CONTEXT / JD SNIPPETS:
---
{context}
---

Return:
- Must-have skills (comma-separated)
- Top 3 responsibilities/outcomes
- Preferred tone & values to mirror (one line)
"""

def step_matchmake(parsed_resume: str, jd_insights: str) -> str:
    return f"""Map resume evidence to JD requirements and build an outline.

RESUME SIGNALS:
---
{parsed_resume}
---

JD INSIGHTS:
---
{jd_insights}
---

Outline:
1) Hook (1–2 sentences)
2) Mappings (3 bullets): <JD requirement> -> <resume evidence with metrics>
3) Close (1–2 sentences)
"""

def step_write(outline: str, job_role: str, company: str, tone: str) -> str:
    return f"""Write a 3-paragraph cover letter for "{job_role}" at "{company}".
Tone: {tone}. Max ~280 words.

Use this outline:
---
{outline}
---
Avoid clichés; mirror essential keywords naturally."""

def step_review(draft: str, jd_insights: str, max_words: int = 280) -> str:
    return f"""Revise to improve JD keyword coverage, clarity, and keep under {max_words} words.

DRAFT:
---
{draft}
---

JD INSIGHTS:
---
{jd_insights}
---

Return only the final letter."""

# =========================================
# Main function (called by Gradio)
# =========================================
def generate_cover_letter(pdf_file, job_role, company_name, company_context, api_key_input="", model_choice="llama-3.3-70b-versatile", temperature=0.4, tone="Professional"):
    try:
        # 0) Groq key (prefer UI input; fallback to env)
        groq_key = (api_key_input or os.getenv("GROQ_API_KEY") or "").strip()
        if not groq_key:
            return "⚠️ Please provide a GROQ_API_KEY (UI field or environment)."

        # 1) PDF → text → chunks → vector store → RAG
        resume_text = extract_text_from_pdf(pdf_file)
        if not resume_text:
            return "⚠️ Could not read the PDF or it was empty."

        chunks = split_text_into_chunks(resume_text)
        if not chunks:
            return "⚠️ Could not split resume text into chunks."

        vector_store = create_vector_store(chunks)
        candidate_profile = perform_rag(vector_store, job_role or "core skills responsibilities achievements")

        if not (candidate_profile and job_role and company_name):
            return "⚠️ Please provide: resume PDF, job role, and company name."

        # 2) Set up AutoGen agents on Groq
        cfg = autogen_llm_config(groq_key=groq_key, model=model_choice, temperature=temperature)

        # (User proxy is optional in this sequential pipeline; we drive messages ourselves)
        _ = UserProxyAgent(name="user_proxy", code_execution_config=False)

        parser = AssistantAgent(
            name="parser",
            system_message="Resume Parser Agent: produce compact, structured signals.",
            llm_config=cfg,
        )
        jd_agent = AssistantAgent(
            name="jd_analyzer",
            system_message="JD Analyzer Agent: extract must-haves, outcomes, and tone.",
            llm_config=cfg,
        )
        matchmaker = AssistantAgent(
            name="matchmaker",
            system_message="Matchmaker Agent: map JD requirements to resume evidence; produce outline.",
            llm_config=cfg,
        )
        writer = AssistantAgent(
            name="writer",
            system_message="Writer Agent: write a 3-paragraph cover letter with active voice and metrics.",
            llm_config=cfg,
        )
        reviewer = AssistantAgent(
            name="reviewer",
            system_message="Reviewer Agent: tighten keyword coverage, remove redundancy, keep under limit.",
            llm_config=cfg,
        )

        # 3) Pipeline: parse → analyze → match → write → review (+ optional revise)
        parsed_resume = parser.generate_reply(messages=[{"role":"user","content": step_parse_resume(candidate_profile)}]).strip()
        jd_insights   = jd_agent.generate_reply(messages=[{"role":"user","content": step_analyze_jd(job_role, company_name, company_context or "")}]).strip()
        outline       = matchmaker.generate_reply(messages=[{"role":"user","content": step_matchmake(parsed_resume, jd_insights)}]).strip()
        draft         = writer.generate_reply(messages=[{"role":"user","content": step_write(outline, job_role, company_name, tone)}]).strip()
        final_letter  = reviewer.generate_reply(messages=[{"role":"user","content": step_review(draft, jd_insights, 280)}]).strip()

        # simple heuristic for a second pass if reviewer asks for more keywords
        if "ADD MORE KEYWORDS" in final_letter.upper():
            draft2       = writer.generate_reply(messages=[{"role":"user","content": step_write(outline, job_role, company_name, tone)}]).strip()
            final_letter = reviewer.generate_reply(messages=[{"role":"user","content": step_review(draft2, jd_insights, 280)}]).strip()

        return final_letter

    except Exception as e:
        return f"⚠️ Error: {e}"

# =========================================
# Gradio UI
# =========================================
with gr.Blocks(title="AutoGen Multi-Agent Cover Letter (Groq + RAG)") as demo:
    gr.Markdown("## 🤖 AutoGen Multi-Agent Cover Letter (Groq + RAG)")
    gr.Markdown("Agents: Parser → JD Analyzer → Matchmaker → Writer → Reviewer. Retrieval with FAISS + MiniLM. **Groq only.**")

    with gr.Row():
        api_key_box = gr.Textbox(label="GROQ_API_KEY (paste here or set env)", type="password")
        model_dd = gr.Dropdown(
            choices=["llama-3.3-70b-versatile","llama3-70b-8192","llama3-8b-8192","mixtral-8x7b-32768"],
            value="llama-3.3-70b-versatile",
            label="Groq Model",
        )
        temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Temperature")
        tone_dd = gr.Dropdown(choices=["Professional","Confident","Friendly","Concise"], value="Professional", label="Tone")

    with gr.Row():
        resume_pdf = gr.File(label="Upload ATS Resume (PDF)")
    with gr.Row():
        job_role = gr.Textbox(label="Job Role", placeholder="Ex: Data Scientist, Fullstack Developer, etc.")
        company_name = gr.Textbox(label="Company Name", placeholder="Enter the company")
    company_context = gr.Textbox(label="Company Context (optional)", lines=6, placeholder="Paste a brief JD/company blurb")

    go_btn = gr.Button("Generate with Agents ✨")
    out_box = gr.Textbox(label="Generated Cover Letter", lines=22, show_copy_button=True)

    go_btn.click(
        generate_cover_letter,
        inputs=[resume_pdf, job_role, company_name, company_context, api_key_box, model_dd, temp_slider, tone_dd],
        outputs=[out_box],
    )

demo.launch(share=True, debug=True)


Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://64a8f0a8911d1787d1.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  docs = retriever.get_relevant_documents(query)






















Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://64a8f0a8911d1787d1.gradio.live




In [None]:
import os, re, fitz, gradio as gr
from typing import List, Tuple

# --- RAG pieces (LangChain-compatible embeddings + FAISS) ---
# CHANGED: use new LC community imports if you're on latest LC; otherwise keep yours.
try:
    from langchain_community.embeddings import HuggingFaceEmbeddings
    from langchain_community.vectorstores import FAISS
    from langchain_text_splitters import RecursiveCharacterTextSplitter
except Exception:
    from langchain.embeddings import HuggingFaceEmbeddings
    from langchain.vectorstores import FAISS
    from langchain.text_splitter import RecursiveCharacterTextSplitter

# --- AutoGen (agents) ---
try:
    from autogen import AssistantAgent, UserProxyAgent
except ImportError:
    from autogen.agentchat import AssistantAgent, UserProxyAgent  # type: ignore

# NEW: semantic model for metrics
from sentence_transformers import SentenceTransformer, util
import numpy as np

# =========================================
# Helpers: PDF → text, chunking, vectorstore
# =========================================
def extract_text_from_pdf(uploaded_file) -> str:
    if not uploaded_file:
        return ""
    path = getattr(uploaded_file, "name", None) or uploaded_file
    doc = fitz.open(path)
    text_pages = []
    for i in range(len(doc)):
        text_pages.append(doc.load_page(i).get_text("text") or "")
    return "\n".join(text_pages).strip()

def split_text_into_chunks(text: str, chunk_size=1000, chunk_overlap=200) -> List[str]:
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len
    )
    return splitter.split_text(text or "")

def create_vector_store(chunks: List[str]):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return FAISS.from_texts(chunks, embeddings)

def perform_rag(vector_store, query: str, k: int = 6) -> str:
    retriever = vector_store.as_retriever(search_kwargs={"k": k})
    docs = retriever.get_relevant_documents(query)
    return "\n\n".join(d.page_content for d in docs if getattr(d, "page_content", ""))

# =========================================
# NEW: Metrics helpers (coverage & accuracy uplift)
# =========================================
_MINILM = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def _lines(text: str) -> List[str]:
    # split by newlines/bullets, keep meaningful lines
    raw = re.split(r"\n|[\u2022\-•]+", text or "")
    return [re.sub(r"\s+", " ", s).strip() for s in raw if re.sub(r"\s+", " ", s).strip()]

def _achievements_from_resume(resume_text: str) -> List[str]:
    # heuristics: bullets or sentences with numbers/verbs
    lines = _lines(resume_text)
    picks = []
    for s in lines:
        if len(s.split()) < 4:
            continue
        if re.search(r"\d|improv|reduc|increas|achiev|built|led|deploy|optimi|design|laun", s, re.I):
            picks.append(s)
    # fallback: top 10 longer lines
    if len(picks) < 5:
        picks = sorted(lines, key=lambda x: -len(x))[:10]
    return list(dict.fromkeys(picks))  # de-dupe

def _token_set(s: str) -> set:
    toks = re.findall(r"[A-Za-z][A-Za-z\-]{2,}", s.lower())
    stop = {"the","and","for","with","from","that","this","into","over","under","using","use","to","in","of","on","a","an"}
    return {t for t in toks if t not in stop}

def _baseline_overlap_acc(jd_reqs: List[str], achv: List[str], thresh: float = 0.20) -> Tuple[float, List[Tuple[str,str,float]]]:
    # simple keyword-overlap baseline (acts like TF/IDF-lite)
    pairs = []
    acc = 0
    for j in jd_reqs:
        J = _token_set(j)
        best_a, best_j = "", 0.0
        for a in achv:
            A = _token_set(a)
            jacc = len(J & A) / max(1, len(J | A))
            if jacc > best_j:
                best_j = jacc; best_a = a
        pairs.append((j, best_a, best_j))
        if best_j >= thresh:
            acc += 1
    return (100.0 * acc / max(1, len(jd_reqs))), pairs

def compute_mapping_metrics(jd_text: str, resume_text: str, cos_thresh: float = 0.55, overlap_thresh: float = 0.20):
    jd_reqs = _lines(jd_text)
    achv = _achievements_from_resume(resume_text)
    if not jd_reqs or not achv:
        return {
            "coverage_pct": 0.0, "acc_semantic_pct": 0.0,
            "acc_baseline_pct": 0.0, "improvement_pct": 0.0,
            "sample_mappings": []
        }

    A = _MINILM.encode(jd_reqs, normalize_embeddings=True)
    B = _MINILM.encode(achv,    normalize_embeddings=True)
    cos = (A @ B.T)  # cosine similarity matrix

    # Coverage: % of JD reqs having any achievement above cos_thresh
    best_scores = cos.max(axis=1)
    covered = int((best_scores >= cos_thresh).sum())
    coverage_pct = round(100.0 * covered / len(jd_reqs), 1)

    # Semantic "accuracy": % of JD reqs whose best match >= cos_thresh
    acc_semantic_pct = coverage_pct  # (same criterion here; treat as semantic accuracy)

    # Baseline keyword-overlap accuracy
    acc_baseline_pct, baseline_pairs = _baseline_overlap_acc(jd_reqs, achv, thresh=overlap_thresh)

    # Relative improvement
    if acc_baseline_pct == 0:
        improvement_pct = 100.0 if acc_semantic_pct > 0 else 0.0
    else:
        improvement_pct = round(((acc_semantic_pct - acc_baseline_pct) / acc_baseline_pct) * 100.0, 1)

    # Build a small sample mapping table
    sample = []
    for i, j in enumerate(jd_reqs[:5]):
        j_best = int(np.argmax(cos[i]))
        sample.append({
            "jd_requirement": j,
            "matched_achievement": achv[j_best],
            "cosine": round(float(cos[i, j_best]), 3)
        })

    return {
        "coverage_pct": coverage_pct,
        "acc_semantic_pct": acc_semantic_pct,
        "acc_baseline_pct": round(acc_baseline_pct, 1),
        "improvement_pct": improvement_pct,
        "sample_mappings": sample
    }

# =========================================
# AutoGen on Groq (NO OpenAI)
# =========================================
def autogen_llm_config(groq_key: str, model: str = "llama-3.3-70b-versatile", temperature: float = 0.4):
    if not groq_key:
        raise ValueError("Missing GROQ_API_KEY. Set it in the env or paste in the UI and set os.environ['GROQ_API_KEY'].")
    return {
        "config_list": [{
            "model": model,
            "api_key": groq_key,
            "base_url": "https://api.groq.com/openai/v1",
        }],
        "temperature": temperature,
        "cache_seed": None,
    }

# ====== Agent step prompts (compact & focused) ======
def step_parse_resume(resume_ctx: str) -> str:
    return f"""Extract a compact, structured summary from RESUME EXCERPTS:

RESUME EXCERPTS:
---
{resume_ctx}
---

Return:
- Skills/Tools: <comma-separated>
- Quantified Achievements:
  • <one line with numbers>
  • <one line with numbers>
  • <one line with numbers>
- Recent Roles:
  • <title, company, years>
  • <title, company, years>
"""

def step_analyze_jd(job_role: str, company: str, context: str) -> str:
    return f"""ROLE: {job_role}
COMPANY: {company}

COMPANY CONTEXT / JD SNIPPETS:
---
{context}
---

Return:
- Must-have skills (comma-separated)
- Top 3 responsibilities/outcomes
- Preferred tone & values to mirror (one line)
"""

def step_matchmake(parsed_resume: str, jd_insights: str) -> str:
    return f"""Map resume evidence to JD requirements and build an outline.

RESUME SIGNALS:
---
{parsed_resume}
---

JD INSIGHTS:
---
{jd_insights}
---

Outline:
1) Hook (1–2 sentences)
2) Mappings (3 bullets): <JD requirement> -> <resume evidence with metrics>
3) Close (1–2 sentences)
"""

def step_write(outline: str, job_role: str, company: str, tone: str) -> str:
    return f"""Write a 3-paragraph cover letter for "{job_role}" at "{company}".
Tone: {tone}. Max ~280 words.

Use this outline:
---
{outline}
---
Avoid clichés; mirror essential keywords naturally."""

def step_review(draft: str, jd_insights: str, max_words: int = 280) -> str:
    return f"""Revise to improve JD keyword coverage, clarity, and keep under {max_words} words.

DRAFT:
---
{draft}
---

JD INSIGHTS:
---
{jd_insights}
---

Return only the final letter."""

# =========================================
# Main function (called by Gradio)
# =========================================
def generate_cover_letter(pdf_file, job_role, company_name, company_context, api_key_input="", model_choice="llama-3.3-70b-versatile", temperature=0.4, tone="Professional"):
    try:
        groq_key = (api_key_input or os.getenv("GROQ_API_KEY") or "").strip()
        if not groq_key:
            return "⚠️ Please provide a GROQ_API_KEY (UI field or environment).", "", ""

        # 1) PDF → text → chunks → vector store → RAG
        resume_text = extract_text_from_pdf(pdf_file)
        if not resume_text:
            return "⚠️ Could not read the PDF or it was empty.", "", ""

        chunks = split_text_into_chunks(resume_text)
        if not chunks:
            return "⚠️ Could not split resume text into chunks.", "", ""

        vector_store = create_vector_store(chunks)
        candidate_profile = perform_rag(vector_store, job_role or "core skills responsibilities achievements")

        if not (candidate_profile and job_role and company_name):
            return "⚠️ Please provide: resume PDF, job role, and company name.", "", ""

        # 1b) NEW: compute metrics using company_context as JD text
        metrics = compute_mapping_metrics(jd_text=company_context or "", resume_text=resume_text)
        metrics_str = (
            f"JD coverage (semantic): {metrics['coverage_pct']}%\n"
            f"Alignment accuracy — baseline (overlap): {metrics['acc_baseline_pct']}%\n"
            f"Alignment accuracy — semantic (MiniLM): {metrics['acc_semantic_pct']}%\n"
            f"Improvement over baseline: {metrics['improvement_pct']}%\n\n"
            f"Sample mappings:\n" +
            "\n".join([f"- JD: {m['jd_requirement']}\n  ↳ Match: {m['matched_achievement']} (cos={m['cosine']})" for m in metrics['sample_mappings']])
        )

        # 2) Set up AutoGen agents on Groq
        cfg = autogen_llm_config(groq_key=groq_key, model=model_choice, temperature=temperature)
        _ = UserProxyAgent(name="user_proxy", code_execution_config=False)

        parser = AssistantAgent(
            name="parser",
            system_message="Resume Parser Agent: produce compact, structured signals.",
            llm_config=cfg,
        )
        jd_agent = AssistantAgent(
            name="jd_analyzer",
            system_message="JD Analyzer Agent: extract must-haves, outcomes, and tone.",
            llm_config=cfg,
        )
        matchmaker = AssistantAgent(
            name="matchmaker",
            system_message="Matchmaker Agent: map JD requirements to resume evidence; produce outline.",
            llm_config=cfg,
        )
        writer = AssistantAgent(
            name="writer",
            system_message="Writer Agent: write a 3-paragraph cover letter with active voice and metrics.",
            llm_config=cfg,
        )
        reviewer = AssistantAgent(
            name="reviewer",
            system_message="Reviewer Agent: tighten keyword coverage, remove redundancy, keep under limit.",
            llm_config=cfg,
        )

        # 3) Pipeline
        parsed_resume = parser.generate_reply(messages=[{"role":"user","content": step_parse_resume(candidate_profile)}]).strip()
        jd_insights   = jd_agent.generate_reply(messages=[{"role":"user","content": step_analyze_jd(job_role, company_name, company_context or "")}]).strip()
        outline       = matchmaker.generate_reply(messages=[{"role":"user","content": step_matchmake(parsed_resume, jd_insights)}]).strip()
        draft         = writer.generate_reply(messages=[{"role":"user","content": step_write(outline, job_role, company_name, tone)}]).strip()
        final_letter  = reviewer.generate_reply(messages=[{"role":"user","content": step_review(draft, jd_insights, 280)}]).strip()

        # second pass if needed
        if "ADD MORE KEYWORDS" in final_letter.upper():
            draft2       = writer.generate_reply(messages=[{"role":"user","content": step_write(outline, job_role, company_name, tone)}]).strip()
            final_letter = reviewer.generate_reply(messages=[{"role":"user","content": step_review(draft2, jd_insights, 280)}]).strip()

        # Return: letter + metrics + short headline for resume
        headline = (
            f"FAISS+MiniLM mapped {metrics['coverage_pct']}% of JD requirements; "
            f"semantic alignment {metrics['acc_semantic_pct']}% (+{metrics['improvement_pct']}% vs baseline)."
        )
        return final_letter, metrics_str, headline

    except Exception as e:
        return f"⚠️ Error: {e}", "", ""

# =========================================
# Gradio UI
# =========================================
with gr.Blocks(title="AutoGen Multi-Agent Cover Letter (Groq + RAG)") as demo:
    gr.Markdown("## 🤖 AutoGen Multi-Agent Cover Letter (Groq + RAG)")
    gr.Markdown("Agents: Parser → JD Analyzer → Matchmaker → Writer → Reviewer. Retrieval with FAISS + MiniLM. **Groq only.**\n\n"
                "**Now with metrics:** JD coverage, baseline vs semantic alignment, and improvement %.")

    with gr.Row():
        api_key_box = gr.Textbox(label="GROQ_API_KEY (paste here or set env)", type="password")
        model_dd = gr.Dropdown(
            choices=["llama-3.3-70b-versatile","llama3-70b-8192","llama3-8b-8192","mixtral-8x7b-32768"],
            value="llama-3.3-70b-versatile",
            label="Groq Model",
        )
        temp_slider = gr.Slider(0.0, 1.0, value=0.4, step=0.05, label="Temperature")
        tone_dd = gr.Dropdown(choices=["Professional","Confident","Friendly","Concise"], value="Professional", label="Tone")

    with gr.Row():
        resume_pdf = gr.File(label="Upload ATS Resume (PDF)")
    with gr.Row():
        job_role = gr.Textbox(label="Job Role", placeholder="Ex: Data Scientist, Fullstack Developer, etc.")
        company_name = gr.Textbox(label="Company Name", placeholder="Enter the company")
    company_context = gr.Textbox(label="Job Description / Company Context", lines=8, placeholder="Paste JD bullets or description here…")

    go_btn = gr.Button("Generate with Agents ✨")
    out_letter = gr.Textbox(label="Generated Cover Letter", lines=20, show_copy_button=True)
    out_metrics = gr.Textbox(label="Mapping Metrics (coverage & alignment)", lines=14, show_copy_button=True)
    out_headline = gr.Textbox(label="1-line Resume Headline (quantified)", lines=2, show_copy_button=True)

    go_btn.click(
        generate_cover_letter,
        inputs=[resume_pdf, job_role, company_name, company_context, api_key_box, model_dd, temp_slider, tone_dd],
        outputs=[out_letter, out_metrics, out_headline],
    )

demo.launch(share=True, debug=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://8546ee740440fcfe53.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://8546ee740440fcfe53.gradio.live




In [None]:
!pip show fitz


Name: fitz
Version: 0.0.1.dev2
Summary: Fitz: Workflow Mangement for neuroimaging data.
Home-page: http://github.com/kastman/fitz
Author: Erik Kastman
Author-email: erik.kastman@gmail.com
License: BSD (3-clause)
Location: /usr/local/lib/python3.11/dist-packages
Requires: configobj, configparser, httplib2, nibabel, nipype, numpy, pandas, pyxnat, scipy
Required-by: 


In [None]:
!pip uninstall -y fitz
!pip install --upgrade pymupdf


Found existing installation: fitz 0.0.1.dev2
Uninstalling fitz-0.0.1.dev2:
  Successfully uninstalled fitz-0.0.1.dev2
Collecting pymupdf
  Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m79.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.26.3


In [None]:
!pip install autogen

Collecting autogen
  Downloading autogen-0.9.7-py3-none-any.whl.metadata (24 kB)
Collecting ag2==0.9.7 (from autogen)
  Downloading ag2-0.9.7-py3-none-any.whl.metadata (35 kB)
Collecting asyncer==0.0.8 (from ag2==0.9.7->autogen)
  Downloading asyncer-0.0.8-py3-none-any.whl.metadata (6.7 kB)
Collecting diskcache (from ag2==0.9.7->autogen)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting docker (from ag2==0.9.7->autogen)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Downloading autogen-0.9.7-py3-none-any.whl (13 kB)
Downloading ag2-0.9.7-py3-none-any.whl (860 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m860.4/860.4 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading asyncer-0.0.8-py3-none-any.whl (9.2 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading docker-7.1.0-py3-none-

In [None]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain_community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 k

In [None]:
!pip install tools

Collecting tools
  Downloading tools-1.0.2-py3-none-any.whl.metadata (1.4 kB)
Downloading tools-1.0.2-py3-none-any.whl (37 kB)
Installing collected packages: tools
Successfully installed tools-1.0.2


In [None]:
!pip install fitz

Collecting fitz
  Downloading fitz-0.0.1.dev2-py2.py3-none-any.whl.metadata (816 bytes)
Collecting configobj (from fitz)
  Downloading configobj-5.0.9-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting configparser (from fitz)
  Downloading configparser-7.2.0-py3-none-any.whl.metadata (5.5 kB)
Collecting nipype (from fitz)
  Downloading nipype-1.10.0-py3-none-any.whl.metadata (7.1 kB)
Collecting pyxnat (from fitz)
  Downloading pyxnat-1.6.3-py3-none-any.whl.metadata (5.4 kB)
Collecting prov>=1.5.2 (from nipype->fitz)
  Downloading prov-2.1.1-py3-none-any.whl.metadata (3.7 kB)
Collecting rdflib>=5.0.0 (from nipype->fitz)
  Downloading rdflib-7.1.4-py3-none-any.whl.metadata (11 kB)
Collecting traits>=6.2 (from nipype->fitz)
  Downloading traits-7.0.2-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.8 kB)
Collecting acres (from nipype->fitz)
  Downloading acres-0.5.0-py3-none-any.whl.metadata (6.2 kB)
Collecting etelemetry>=0.3.1

In [None]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.1.0-py3-n

In [None]:
!pip install autogen

Collecting autogen
  Downloading autogen-0.9.7-py3-none-any.whl.metadata (24 kB)
Collecting ag2==0.9.7 (from autogen)
  Downloading ag2-0.9.7-py3-none-any.whl.metadata (35 kB)
Collecting asyncer==0.0.8 (from ag2==0.9.7->autogen)
  Downloading asyncer-0.0.8-py3-none-any.whl.metadata (6.7 kB)
Collecting diskcache (from ag2==0.9.7->autogen)
  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Collecting docker (from ag2==0.9.7->autogen)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting python-dotenv (from ag2==0.9.7->autogen)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Downloading autogen-0.9.7-py3-none-any.whl (13 kB)
Downloading ag2-0.9.7-py3-none-any.whl (860 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m860.4/860.4 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading asyncer-0.0.8-py3-none-any.whl (9.2 kB)
Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━