In [None]:
!pip install anthropic

In [2]:
import os
from datetime import datetime
from typing import List, Dict, Optional
from dataclasses import dataclass
import json
from anthropic import Anthropic
import pandas as pd
from concurrent.futures import ThreadPoolExecutor

In [3]:
@dataclass
class Candidate:
    name: str
    position: str
    experience_years: float
    responses: Dict[str, str]
    scores: Optional[Dict[str, float]] = None
    total_score: float = 0.0
    rank: int = 0

In [6]:

class CandidateRankingSystem:
    def __init__(self, api_key: str):
        self.client = Anthropic(api_key=api_key)

        # Evaluation criteria with weights
        self.criteria = {
            "technical_knowledge": {
                "weight": 0.35,
                "description": "Understanding of AI/ML concepts and techniques"
            },
            "problem_solving": {
                "weight": 0.30,
                "description": "Analytical thinking and solution design"
            },
            "practical_experience": {
                "weight": 0.25,
                "description": "Hands-on project experience"
            },
            "innovation": {
                "weight": 0.10,
                "description": "Creative thinking and research awareness"
            }
        }

        # Core interview questions
        self.questions = {
            "technical": "Explain the differences between supervised, unsupervised, and reinforcement learning. Include specific use cases for each.",
            "problem_solving": "How would you design a real-time recommendation system that handles both new users and new content?",
            "experience": "Describe the most challenging ML project you've implemented and its impact.",
            "innovation": "What recent AI developments do you find most promising and why?"
        }

    def generate_evaluation_prompt(self, candidate: Candidate) -> str:
        """
        Generates a focused prompt for numerical scoring
        """
        prompt = f"""You are an expert AI technical evaluator. Score this candidate's interview responses for an AI/ML role.

Candidate Profile:
- Name: {candidate.name}
- Position: {candidate.position}
- Experience: {candidate.experience_years} years

Score each response on a scale of 1-10 based on:
- Technical Knowledge (35%): Depth of understanding
- Problem Solving (30%): Analytical approach
- Practical Experience (25%): Real-world implementation
- Innovation (10%): Creative thinking

Responses to evaluate:
"""

        for q_type, question in self.questions.items():
            prompt += f"\n{q_type.upper()}:\n{question}\n"
            prompt += f"Response:\n{candidate.responses.get(q_type, 'No response')}\n"

        prompt += """
Return only a JSON object with scores:
{
    "scores": {
        "technical_knowledge": float,
        "problem_solving": float,
        "practical_experience": float,
        "innovation": float
    },
    "total_weighted_score": float
}"""
        return prompt

    def evaluate_candidate(self, candidate: Candidate) -> None:
        """
        Evaluates a single candidate and assigns scores
        """
        prompt = self.generate_evaluation_prompt(candidate)

        response = self.client.messages.create(
            model="claude-3-sonnet-20240229",
            max_tokens=1000,
            temperature=0.1,
            messages=[
                {"role": "user", "content": prompt}
            ]
        )

        try:
            evaluation = json.loads(response.content)
            candidate.scores = evaluation["scores"]
            candidate.total_score = evaluation["total_weighted_score"]
        except json.JSONDecodeError:
            print(f"Error evaluating candidate {candidate.name}")
            candidate.total_score = 0

    def rank_candidates(self, candidates: List[Candidate]) -> List[Candidate]:
        """
        Evaluates and ranks multiple candidates concurrently
        """
        # Evaluate all candidates concurrently
        with ThreadPoolExecutor(max_workers=5) as executor:
            executor.map(self.evaluate_candidate, candidates)

        # Sort candidates by total score
        ranked_candidates = sorted(candidates,
                                 key=lambda x: x.total_score,
                                 reverse=True)

        # Assign ranks
        for i, candidate in enumerate(ranked_candidates, 1):
            candidate.rank = i

        return ranked_candidates

    def generate_ranking_report(self, ranked_candidates: List[Candidate]) -> str:
        """
        Generates a concise ranking report
        """
        report = "Candidate Ranking Report\n"
        report += "=====================\n\n"
        report += f"Total Candidates Evaluated: {len(ranked_candidates)}\n"
        #report += f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n"

        report += "Rankings:\n"
        report += "---------\n"

        for candidate in ranked_candidates:
            report += f"\nRank {candidate.rank}: {candidate.name}\n"
            report += f"Position: {candidate.position}\n"
            report += f"Experience: {candidate.experience_years} years\n"
            report += f"Total Score: {candidate.total_score:.2f}/10\n"

            if candidate.scores:
                report += "Category Scores:\n"
                for category, score in candidate.scores.items():
                    report += f"- {category.replace('_', ' ').title()}: {score:.2f}/10\n"

            report += "-" * 40 + "\n"

        return report



In [None]:
def run_candidate_ranking(api_key: str) -> str:
    """
    Runs a mock ranking session with multiple candidates
    """
    system = CandidateRankingSystem(api_key)

    # Create mock candidates with varying profiles
    candidates = [
        Candidate(
            name="Alex Kumar",
            position="Senior ML Engineer",
            experience_years=5.5,
            responses={
                "technical": """Supervised learning uses labeled data for training models on specific tasks. Examples include image classification using CNNs and sentiment analysis using BERT. Unsupervised learning finds patterns in unlabeled data through clustering and dimensionality reduction. Reinforcement learning involves agents learning optimal policies through environment interaction, like in game AI and robotics.""",

                "problem_solving": """I'd implement a hybrid system combining collaborative filtering with content-based approaches. For new users, we'd use content preferences and demographic features. For new content, we'd use embeddings from content features and start with similar item recommendations. A multi-armed bandit would handle exploration vs exploitation.""",

                "experience": """Led development of a real-time fraud detection system processing millions of transactions daily. Implemented an ensemble of gradient boosting and deep learning models, achieving 99.9% accuracy and reducing false positives by 40%. Built streaming pipeline using Kafka and deployed on Kubernetes.""",

                "innovation": """Most excited about advances in multimodal models and few-shot learning. These developments enable more natural human-AI interaction and reduce data requirements for new applications. Currently experimenting with self-supervised learning techniques for improved model efficiency."""
            }
        ),

        Candidate(
            name="Sarah Chen",
            position="ML Engineer",
            experience_years=3.0,
            responses={
                "technical": """Supervised learning works with labeled data, unsupervised finds patterns without labels, and reinforcement learning uses rewards to train agents. Common applications include classification, clustering, and game AI.""",

                "problem_solving": """Would use collaborative filtering to find similar users and content-based filtering for new items. Could ask users for preferences during signup and show popular items in their preferred categories.""",

                "experience": """Built a customer churn prediction model using random forests. Handled imbalanced data using SMOTE and achieved 85% accuracy. Deployed model using Flask API and monitored performance monthly.""",

                "innovation": """Interested in large language models and their applications. Following developments through blogs and online courses. Experimenting with fine-tuning pre-trained models."""
            }
        ),

        Candidate(
            name="Michael Zhang",
            position="Senior Data Scientist",
            experience_years=4.5,
            responses={
                "technical": """Supervised learning maps inputs to outputs using labeled training data, essential for predictive modeling. Unsupervised learning discovers data patterns through techniques like clustering and dimensionality reduction. Reinforcement learning enables autonomous decision-making through reward-based training.""",

                "problem_solving": """Would design a multi-stage system: 1) Content analysis using deep learning for feature extraction, 2) User preference modeling with matrix factorization, 3) Real-time serving using approximate nearest neighbors. Handle cold start with demographic features and popular items.""",

                "experience": """Developed computer vision system for quality control in manufacturing. Used transfer learning with EfficientNet, implemented data pipeline for real-time processing. Reduced defect escape rate by 60% and saved $2M annually.""",

                "innovation": """Fascinated by recent advances in self-supervised learning and its potential for reducing labeled data requirements. Also following developments in neural architecture search for automated model optimization."""
            }
        ),

        Candidate(
            name="Emily Rodriguez",
            position="ML Research Engineer",
            experience_years=2.5,
            responses={
                "technical": """Three main types of machine learning: supervised uses labeled examples, unsupervised looks for patterns, and reinforcement learning trains through trial and error. Each has different applications in real-world problems.""",

                "problem_solving": """For recommendations, we need both collaborative and content-based filtering. Could use matrix factorization for user-item interactions and neural networks for processing content features. Would A/B test different approaches.""",

                "experience": """Worked on NLP project for text classification. Used BERT with custom preprocessing pipeline, improved accuracy by 15%. Implemented using PyTorch and deployed on AWS.""",

                "innovation": """Interested in efficient training methods and model compression. Following research on knowledge distillation and quantization. Think these are crucial for practical AI applications."""
            }
        )
    ]

    # Rank candidates
    ranked_candidates = system.rank_candidates(candidates)

    # Generate report
    return system.generate_ranking_report(ranked_candidates)


In [None]:
if __name__ == "__main__":
    api_key = "your-anthropic-api-key"  # Replace with actual API key
    report = run_candidate_ranking(api_key)


In [32]:
print(report)

Candidate Ranking Report

Total Candidates Evaluated: 4
Rankings:
---------

Rank 1: Michael Zhang
Position: Senior Data Scientist
Experience: 4.5 years
Total Score: 8.10/10
Category Scores:
- Technical Knowledge: 8.00/10
- Problem Solving: 8.50/10
- Practical Experience: 8.00/10
- Innovation: 7.50/10
----------------------------------------

Rank 2: Alex Kumar
Position: Senior ML Engineer
Experience: 5.5 years
Total Score: 7.83/10
Category Scores:
- Technical Knowledge: 7.50/10
- Problem Solving: 8.00/10
- Practical Experience: 8.50/10
- Innovation: 7.00/10
----------------------------------------

Rank 3: Sarah Chen
Position: ML Engineer
Experience: 3.0 years
Total Score: 5.85/10
Category Scores:
- Technical Knowledge: 5.50/10
- Problem Solving: 6.00/10
- Practical Experience: 6.50/10
- Innovation: 5.00/10
----------------------------------------

Rank 4: Emily Rodriguez
Position: ML Research Engineer
Experience: 2.5 years
Total Score: 5.83/10
Category Scores:
- Technical Knowledge: 