In [5]:
!pip install langchain langgraph langchain-openai pydantic httpx tenacity psutil


Collecting langgraph
  Downloading langgraph-0.6.8-py3-none-any.whl.metadata (6.8 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.34-py3-none-any.whl.metadata (2.4 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.1.0 (from langgraph)
  Downloading langgraph_checkpoint-2.1.1-py3-none-any.whl.metadata (4.2 kB)
Collecting langgraph-prebuilt<0.7.0,>=0.6.0 (from langgraph)
  Downloading langgraph_prebuilt-0.6.4-py3-none-any.whl.metadata (4.5 kB)
Collecting langgraph-sdk<0.3.0,>=0.2.2 (from langgraph)
  Downloading langgraph_sdk-0.2.9-py3-none-any.whl.metadata (1.5 kB)
Collecting langchain-core<1.0.0,>=0.3.72 (from langchain)
  Downloading langchain_core-0.3.77-py3-none-any.whl.metadata (3.2 kB)
Collecting ormsgpack>=1.10.0 (from langgraph-checkpoint<3.0.0,>=2.1.0->langgraph)
  Downloading ormsgpack-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m2.5 MB/s[

In [1]:
!pip install transformers accelerate torch




In [10]:
"""
Complete CV Evaluation System using LangGraph - FIXED VERSION
"""

# ============================================================================
# IMPORTS
# ============================================================================
from typing import TypedDict, Optional
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langgraph.graph import StateGraph, END
import json
import time
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
import httpx
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import re


model_name = "deepseek-ai/deepseek-coder-1.3b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

def llm_call(prompt, max_tokens=1024, temperature=0.3):
    """Call LLM with better parameters for JSON generation"""
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id
    )
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return result


# ============================================================================
# MODELS & TYPE DEFINITIONS
# ============================================================================

class Screening(BaseModel):
    """Model for CV screening results."""
    score: bool = Field(
        ...,
        description="Return True if the candidate's CV matches the job requirements, otherwise return False."
    )
    reasoning: str = Field(
        default="",
        description="Brief explanation of the screening decision."
    )
    matched_skills: list[str] = Field(
        default_factory=list,
        description="List of skills from CV that match job requirements."
    )
    missing_requirements: list[str] = Field(
        default_factory=list,
        description="List of requirements that are missing from the CV."
    )


class QuestionSet(BaseModel):
    """Model for set of interview questions."""
    questions: list[str] = Field(
        ...,
        description="List of 20 interview questions based on CV and job requirements"
    )


class IdeaEvaluationState(TypedDict, total=False):
    """State dictionary for CV evaluation workflow."""
    cv: str
    required_cv: str
    screening: Optional[dict]
    candidate_email: Optional[str]
    candidate_name: Optional[str]
    company_name: Optional[str]
    email_sent: Optional[bool]
    questions: Optional[dict]


# ============================================================================
# IMPROVED JSON EXTRACTION
# ============================================================================

def extract_json(text: str) -> str:
    """Extract JSON object from LLM response with better error handling"""
    # Remove the prompt echo if present
    if "Human:" in text:
        text = text.split("Human:")[-1]

    # Try to find JSON object between curly braces
    json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
    matches = re.findall(json_pattern, text, re.DOTALL)

    for match in matches:
        try:
            # Try to parse it
            obj = json.loads(match)
            # Check if it has the required 'score' field
            if 'score' in obj:
                return json.dumps(obj)
        except json.JSONDecodeError:
            continue

    # If no valid JSON found, try to extract key-value pairs manually
    try:
        # Look for score
        score_match = re.search(r'"score"\s*:\s*(true|false)', text, re.IGNORECASE)
        score = score_match.group(1).lower() == 'true' if score_match else False

        # Look for reasoning
        reasoning_match = re.search(r'"reasoning"\s*:\s*"([^"]*)"', text, re.DOTALL)
        reasoning = reasoning_match.group(1) if reasoning_match else ""

        # Look for matched_skills array
        skills_match = re.search(r'"matched_skills"\s*:\s*\[(.*?)\]', text, re.DOTALL)
        matched_skills = []
        if skills_match:
            skills_text = skills_match.group(1)
            matched_skills = [s.strip().strip('"') for s in skills_text.split(',') if s.strip()]

        # Look for missing_requirements array
        missing_match = re.search(r'"missing_requirements"\s*:\s*\[(.*?)\]', text, re.DOTALL)
        missing_requirements = []
        if missing_match:
            missing_text = missing_match.group(1)
            missing_requirements = [s.strip().strip('"') for s in missing_text.split(',') if s.strip()]

        # Construct valid JSON
        result = {
            "score": score,
            "reasoning": reasoning,
            "matched_skills": matched_skills,
            "missing_requirements": missing_requirements
        }
        return json.dumps(result)
    except Exception as e:
        print(f"⚠️ JSON extraction failed: {e}")
        # Return minimal valid JSON as last resort
        return json.dumps({
            "score": False,
            "reasoning": "Failed to parse LLM response",
            "matched_skills": [],
            "missing_requirements": ["Unable to evaluate"]
        })


def prompt_from(text: str):
    """Helper function to create a ChatPromptTemplate from text."""
    return ChatPromptTemplate.from_template(text)


# ============================================================================
# SIMPLIFIED PROMPTS
# ============================================================================

prompt_screening_text = """You are an HR recruiter. Analyze this CV against job requirements.

Job Requirements:
{required_cv}

Candidate CV:
{cv}

Respond with ONLY a JSON object (no markdown, no explanation):
{{
  "score": true or false,
  "reasoning": "brief explanation",
  "matched_skills": ["skill1", "skill2"],
  "missing_requirements": ["requirement1", "requirement2"]
}}

Set score=true if candidate meets core requirements, false otherwise."""

prompt_screening = prompt_from(prompt_screening_text)

json_screening = JsonOutputParser(pydantic_object=Screening)

prompt_questions_text = """Generate 20 interview questions for this candidate.

Job Requirements:
{required_cv}

Candidate CV:
{cv}

Respond with ONLY a JSON object:
{{
  "questions": ["question1", "question2", ...]
}}

Include 10 technical, 5 experience-based, and 5 problem-solving questions."""

prompt_questions = prompt_from(prompt_questions_text)
json_questions = JsonOutputParser(pydantic_object=QuestionSet)


# ============================================================================
# GRAPH NODES
# ============================================================================

@retry(
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=1, min=2, max=10),
    retry=retry_if_exception_type((httpx.RemoteProtocolError, httpx.ReadTimeout, ConnectionError))
)
def run_chain(prompt, parser, state: IdeaEvaluationState, key: str) -> IdeaEvaluationState:
    try:
        # Format prompt
        prompt_text = prompt.format(
            cv=state["cv"],
            required_cv=state["required_cv"]
        )

        print(f"\n📤 Sending prompt to LLM...")
        response = llm_call(prompt_text, max_tokens=1024, temperature=0.3)
        print(f"\n🔎 Raw LLM response:\n{response[:500]}...")

        # Extract JSON
        clean_response = extract_json(response)
        print(f"\n✨ Extracted JSON:\n{clean_response}")

        # Parse JSON
        try:
            result = json.loads(clean_response)
            # Validate it has required fields
            if key == "screening" and "score" not in result:
                raise ValueError("Missing 'score' field in response")

            new_state = dict(state)
            new_state[key] = result
            return new_state

        except (json.JSONDecodeError, ValueError) as e:
            print(f"⚠️ JSON parsing error: {e}")
            raise

    except Exception as e:
        print(f"⚠️ Attempt failed: {str(e)[:100]}...")
        raise


def screening_agent(state: IdeaEvaluationState) -> IdeaEvaluationState:
    """Agent node for CV screening with error handling."""
    print("\n🔍 Starting CV Screening...")
    try:
        return run_chain(prompt_screening, json_screening, state, "screening")
    except Exception as e:
        print(f"❌ Screening agent failed after retries: {e}")
        new_state = dict(state)
        new_state["screening"] = {
            "score": False,
            "reasoning": f"Error during evaluation: {str(e)[:200]}",
            "matched_skills": [],
            "missing_requirements": ["Unable to complete evaluation due to technical error"]
        }
        return new_state


def email_agent(state: IdeaEvaluationState) -> IdeaEvaluationState:
    """Email notification agent (placeholder)."""
    print("\n📧 Email notification step...")
    screening = state.get("screening", {})
    score = screening.get("score", False)

    if score:
        print("✅ Candidate ACCEPTED - Would send acceptance email")
    else:
        print("❌ Candidate REJECTED - Would send rejection email")

    new_state = dict(state)
    new_state["email_sent"] = True
    return new_state


def should_continue_to_interview(state: IdeaEvaluationState) -> str:
    """Route after email - continue to interview if accepted."""
    screening = state.get("screening", {})
    if screening.get("score", False):
        return "question_generator_agent"
    else:
        return END


def question_generator_agent(state: IdeaEvaluationState) -> IdeaEvaluationState:
    """Question Generation Agent."""
    print("\n📝 Generating interview questions...")

    if not state.get("screening", {}).get("score", False):
        print("⏭️ Skipping question generation (candidate rejected)")
        return state

    try:
        result = run_chain(prompt_questions, json_questions, state, "questions")
        questions = result.get("questions", {}).get("questions", [])
        print(f"✅ Generated {len(questions)} questions")
        return result
    except Exception as e:
        print(f"❌ Question generation failed: {e}")
        return state


# ============================================================================
# GRAPH BUILDING
# ============================================================================

def build_graph():
    """Factory function to create a new graph instance."""
    workflow = StateGraph(IdeaEvaluationState)

    workflow.add_node("screening_agent", screening_agent)
    workflow.add_node("email_agent", email_agent)
    workflow.add_node("question_generator_agent", question_generator_agent)

    workflow.set_entry_point("screening_agent")
    workflow.add_edge("screening_agent", "email_agent")
    workflow.add_conditional_edges(
        "email_agent",
        should_continue_to_interview,
        {
            "question_generator_agent": "question_generator_agent",
            END: END
        }
    )
    workflow.add_edge("question_generator_agent", END)

    return workflow.compile()


# ============================================================================
# MAIN EVALUATION FUNCTION
# ============================================================================

def evaluate_cv(cv_text: str, required_cv_text: str = "", max_retries: int = 3,
                candidate_email: str = "", candidate_name: str = "", company_name: str = ""):
    """Evaluate a CV against job requirements."""
    if not cv_text or not cv_text.strip():
        raise ValueError("CV text cannot be empty")

    graph = build_graph()

    initial_state: IdeaEvaluationState = {
        "cv": cv_text,
        "required_cv": required_cv_text,
        "candidate_email": candidate_email,
        "candidate_name": candidate_name,
        "company_name": company_name,
    }

    for attempt in range(max_retries):
        try:
            result = graph.invoke(initial_state)
            return result
        except Exception as e:
            if attempt < max_retries - 1:
                wait_time = 2 ** attempt
                print(f"\n⚠️ Error (attempt {attempt + 1}/{max_retries}): {str(e)[:100]}")
                print(f"⏳ Waiting {wait_time} seconds before retry...\n")
                time.sleep(wait_time)
            else:
                print(f"\n❌ Failed after {max_retries} attempts")
                raise

    return result


# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def format_screening_result(result) -> str:
    """Format the screening result for display."""
    if not result or "screening" not in result:
        return "No screening results available"

    screening = result["screening"]
    score = screening.get("score", False)
    reasoning = screening.get("reasoning", "")
    matched_skills = screening.get("matched_skills", [])
    missing_requirements = screening.get("missing_requirements", [])
    questions = result.get("questions", {}).get("questions", [])

    output = [
        "\n" + "="*60,
        "CV SCREENING RESULTS",
        "="*60,
        f"\n✓ MATCH: {'YES ✅' if score else 'NO ❌'}",
        f"\n📝 Reasoning:\n{reasoning}",
    ]

    if matched_skills:
        output.append(f"\n✅ Matched Skills ({len(matched_skills)}):")
        for skill in matched_skills:
            output.append(f"   • {skill}")

    if missing_requirements:
        output.append(f"\n❌ Missing Requirements ({len(missing_requirements)}):")
        for req in missing_requirements:
            output.append(f"   • {req}")

    if questions:
        output.append(f"\n🎯 Generated Interview Questions ({len(questions)}):")
        for i, q in enumerate(questions[:5], 1):  # Show first 5
            output.append(f"   {i}. {q}")
        if len(questions) > 5:
            output.append(f"   ... and {len(questions) - 5} more questions")

    output.append("\n" + "="*60)
    return "\n".join(output)


# ============================================================================
# MAIN EXECUTION
# ============================================================================

if __name__ == "__main__":
    cv_text = """
# **Mahmoud Mohamed Omran**
# **Machine Learning Engineer**

Future City, Cairo | (+20) 1113719367 | Mahmoudomran36@gmail.com
LinkedIn: linkedin.com/in/mahmoud-omran
Github: github.com/Omran28

## **Summary:**
Computer Science graduate with experience developing AI solutions in computer vision, NLP, and predictive modelling.
Proficient in PyTorch, TensorFlow, SQL, and Azure.

## **Experience:**
### **Computer Vision Intern**
**National Telecommunication Institute (NTI)** | Jul 2025 – Present
- ML pipelines, CNNs, NLP techniques

### **Data Analytics Intern**
**EYouth**, Giza | Apr 2025 – May 2025
- Azure-based e-commerce, Random forest (98% accuracy)

## **Education:**
**Bachelor's Degree in Computer Science**
**Ain Shams University** (2019 – 2023)

## **Technical Skills:**
- Python, SQL, C++, Java
- PyTorch, TensorFlow, Pandas, NumPy
- Git, Flask, Django, Streamlit, Azure

## **Projects:**
- Customer Repurchase Classification (98% accuracy)
- Thief Video Classifier (99.74% accuracy)
"""

    required_cv_text = """
**Job Requirements:**
- 2+ years experience in Machine Learning
- Strong Python skills
- Experience with PyTorch and TensorFlow
- Computer Vision expertise
- Bachelor's degree in Computer Science
- Experience with deployment tools (Docker, Streamlit)
- Cloud platforms (AWS, Azure, GCP)
"""

    try:
        print("\n" + "="*60)
        print("🚀 CV EVALUATION SYSTEM")
        print("="*60)

        result = evaluate_cv(
            cv_text,
            required_cv_text,
            candidate_email="sondosahmed72@gmail.com",
            candidate_name="Sondos Ahmed",
            company_name="TechCorp AI"
        )

        print(format_screening_result(result))

    except Exception as e:
        print(f"\n❌ Error: {e}")
        import traceback
        traceback.print_exc()


🚀 CV EVALUATION SYSTEM

🔍 Starting CV Screening...

📤 Sending prompt to LLM...

🔎 Raw LLM response:
Human: You are an HR recruiter. Analyze this CV against job requirements.

Job Requirements:

**Job Requirements:**
- 2+ years experience in Machine Learning
- Strong Python skills
- Experience with PyTorch and TensorFlow
- Computer Vision expertise
- Bachelor's degree in Computer Science
- Experience with deployment tools (Docker, Streamlit)
- Cloud platforms (AWS, Azure, GCP)


Candidate CV:

# **Mahmoud Mohamed Omran**
# **Machine Learning Engineer**

Future City, Cairo | (+20) 1113719367 | M...

✨ Extracted JSON:
{"score": true, "reasoning": "Mahmoud has strong Python skills, experience with PyTorch and TensorFlow, and experience with Azure. He also has a strong understanding of computer vision and has worked on several projects.", "matched_skills": ["Python", "PyTorch", "TensorFlow", "Azure"], "missing_requirements": ["Computer Vision"]}

📧 Email notification step...
✅ Candidate AC