# üìä Answer Scoring & Evaluation

This notebook implements the answer matching and scoring system:

1. **Semantic Similarity** - Sentence embeddings
2. **Keyword Matching** - Must-have terms
3. **Step Matching** - Multi-step solutions
4. **Partial Credit Rules** - Subject-specific logic

---

## 1. Setup & Imports

In [2]:
# Install dependencies if needed
# !pip install sentence-transformers numpy scikit-learn

In [3]:
import numpy as np
import re
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass, field
from enum import Enum
import warnings
warnings.filterwarnings('ignore')

# Sentence Transformers for semantic similarity
try:
    from sentence_transformers import SentenceTransformer
    EMBEDDINGS_AVAILABLE = True
except ImportError:
    EMBEDDINGS_AVAILABLE = False
    print("‚ö†Ô∏è sentence-transformers not installed. Using fallback similarity.")

print(f"Embeddings available: {EMBEDDINGS_AVAILABLE}")

Embeddings available: True


---
## 2. Data Structures

In [4]:
class SubjectType(Enum):
    """Subject categories for scoring rules."""
    MATH = "math"
    PHYSICS = "physics"
    CHEMISTRY = "chemistry"
    BIOLOGY = "biology"
    ENGLISH = "english"
    GENERAL = "general"


@dataclass
class AnswerKey:
    """
    Reference answer with scoring criteria.
    
    Attributes:
        full_answer: Complete correct answer
        keywords: Must-have terms (with weights)
        steps: Ordered steps for multi-part answers
        max_marks: Maximum marks for this question
        subject: Subject for rule-specific scoring
    """
    full_answer: str
    keywords: Dict[str, float] = field(default_factory=dict)
    steps: List[str] = field(default_factory=list)
    max_marks: float = 10.0
    subject: SubjectType = SubjectType.GENERAL
    partial_credit_rules: Dict[str, float] = field(default_factory=dict)


@dataclass
class ScoringResult:
    """
    Result of answer evaluation.
    
    Attributes:
        score: Computed score (0 to max_marks)
        max_marks: Maximum possible marks
        confidence: Model confidence (0-1)
        breakdown: Score breakdown by component
        feedback: Suggestions for the student
        matched_keywords: Keywords found in answer
        missing_keywords: Keywords not found
    """
    score: float
    max_marks: float
    confidence: float
    breakdown: Dict[str, float] = field(default_factory=dict)
    feedback: List[str] = field(default_factory=list)
    matched_keywords: List[str] = field(default_factory=list)
    missing_keywords: List[str] = field(default_factory=list)
    matched_steps: List[int] = field(default_factory=list)


# Example
example_key = AnswerKey(
    full_answer="Newton's first law states that an object at rest stays at rest, and an object in motion stays in motion, unless acted upon by an external force.",
    keywords={
        "rest": 0.15,
        "motion": 0.15,
        "external force": 0.2,
        "inertia": 0.1,
    },
    steps=[
        "object at rest stays at rest",
        "object in motion stays in motion",
        "unless acted upon by external force"
    ],
    max_marks=5.0,
    subject=SubjectType.PHYSICS
)
print(f"Example answer key: {example_key.full_answer[:50]}...")

Example answer key: Newton's first law states that an object at rest s...


---
## 3. Semantic Similarity Scoring

Using Sentence-Transformers to compare answer embeddings.

In [11]:
class SemanticScorer:
    """
    Score answers using semantic similarity (embeddings).
    
    Uses all-MiniLM-L6-v2 for efficient embeddings.
    """
    
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        if EMBEDDINGS_AVAILABLE:
            print(f"Loading embedding model: {model_name}...")
            self.model = SentenceTransformer(model_name)
        else:
            self.model = None
    
    def compute_similarity(self, text1: str, text2: str) -> float:
        """
        Compute semantic similarity between two texts.
        
        Returns:
            Cosine similarity (0-1)
        """
        if self.model is None:
            return self._fallback_similarity(text1, text2)
        
        embeddings = self.model.encode([text1, text2])
        similarity = np.dot(embeddings[0], embeddings[1]) / (
            np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1])
        )
        return float(max(0, similarity))  # Clamp to 0-1
    
    def _fallback_similarity(self, text1: str, text2: str) -> float:
        """
        Fallback similarity using Jaccard index.
        """
        words1 = set(text1.lower().split())
        words2 = set(text2.lower().split())
        
        if not words1 or not words2:
            return 0.0
        
        intersection = len(words1 & words2)
        union = len(words1 | words2)
        
        return intersection / union if union > 0 else 0.0
    
    def score_answer(self, student_answer: str, reference_answer: str, max_marks: float = 10.0) -> Tuple[float, float]:
        """
        Score answer based on semantic similarity.
        
        Returns:
            (score, confidence)
        """
        similarity = self.compute_similarity(student_answer, reference_answer)
        
        # Convert similarity to score
        # Using a curve that gives partial credit
        if similarity >= 0.9:
            score_ratio = 1.0
        elif similarity >= 0.7:
            score_ratio = 0.7 + (similarity - 0.7) * 1.5  # 0.7-1.0
        elif similarity >= 0.5:
            score_ratio = 0.4 + (similarity - 0.5) * 1.5  # 0.4-0.7
        else:
            score_ratio = similarity * 0.8  # 0-0.4
        
        score = score_ratio * max_marks
        confidence = min(0.95, similarity + 0.2)  # Cap confidence at 95%
        
        return score, confidence


# Test
semantic_scorer = SemanticScorer()

reference = "Newton's first law states that an object at rest stays at rest."
student_good = "Newton's first law says an object at rest remains at rest."
student_partial = "Objects don't move unless pushed."
student_wrong = "Gravity pulls things down."

for student, label in [(student_good, "Good"), (student_partial, "Partial"), (student_wrong, "Wrong")]:
    sim = semantic_scorer.compute_similarity(reference, student)
    score, conf = semantic_scorer.score_answer(student, reference, max_marks=10)
    print(f"{label}: similarity={sim:.2f}, score={score:.1f}/10, confidence={conf:.2f}")

Loading embedding model: all-MiniLM-L6-v2...
Good: similarity=0.97, score=10.0/10, confidence=0.95
Partial: similarity=0.41, score=3.3/10, confidence=0.61
Wrong: similarity=0.36, score=2.9/10, confidence=0.56


---
## 4. Keyword Matching

Check for must-have terms and assign partial credit.

In [12]:
class KeywordMatcher:
    """
    Match keywords in student answers.
    
    Features:
    - Case-insensitive matching
    - Synonym support
    - Weighted keywords
    """
    
    # Common synonyms/variants
    SYNONYMS = {
        "force": ["forces", "push", "pull"],
        "motion": ["moving", "movement", "velocity"],
        "rest": ["stationary", "still", "not moving"],
        "energy": ["power", "work"],
        "mass": ["weight", "kg"],
        "acceleration": ["speeding up", "slowing down", "a"],
    }
    
    def __init__(self, use_synonyms: bool = True):
        self.use_synonyms = use_synonyms
    
    def normalize_text(self, text: str) -> str:
        """Normalize text for matching."""
        text = text.lower()
        text = re.sub(r'[^a-z0-9\s]', ' ', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text
    
    def find_keyword(self, keyword: str, text: str) -> bool:
        """
        Check if keyword (or synonym) is in text.
        """
        text_norm = self.normalize_text(text)
        keyword_norm = self.normalize_text(keyword)
        
        # Direct match
        if keyword_norm in text_norm:
            return True
        
        # Synonym match
        if self.use_synonyms:
            synonyms = self.SYNONYMS.get(keyword_norm, [])
            for syn in synonyms:
                if syn in text_norm:
                    return True
        
        return False
    
    def match_keywords(
        self, 
        student_answer: str, 
        keywords: Dict[str, float]
    ) -> Tuple[List[str], List[str], float]:
        """
        Match keywords and calculate weighted score.
        
        Args:
            student_answer: Student's response
            keywords: Dict of keyword -> weight
            
        Returns:
            (matched, missing, score_ratio)
        """
        matched = []
        missing = []
        total_weight = sum(keywords.values())
        matched_weight = 0.0
        
        for keyword, weight in keywords.items():
            if self.find_keyword(keyword, student_answer):
                matched.append(keyword)
                matched_weight += weight
            else:
                missing.append(keyword)
        
        score_ratio = matched_weight / total_weight if total_weight > 0 else 0.0
        return matched, missing, score_ratio


# Test
keyword_matcher = KeywordMatcher()

keywords = {
    "rest": 0.2,
    "motion": 0.2,
    "external force": 0.3,
    "inertia": 0.15,
    "newton": 0.15
}

test_answers = [
    "Newton's first law of inertia states objects at rest stay at rest unless an external force acts.",
    "Things that are still stay still, and moving things keep moving.",
    "Force equals mass times acceleration."
]

for answer in test_answers:
    matched, missing, score = keyword_matcher.match_keywords(answer, keywords)
    print(f"Answer: '{answer[:50]}...'")
    print(f"  Matched: {matched}")
    print(f"  Missing: {missing}")
    print(f"  Score: {score:.0%}")
    print()

Answer: 'Newton's first law of inertia states objects at re...'
  Matched: ['rest', 'external force', 'inertia', 'newton']
  Missing: ['motion']
  Score: 80%

Answer: 'Things that are still stay still, and moving thing...'
  Matched: ['rest', 'motion']
  Missing: ['external force', 'inertia', 'newton']
  Score: 40%

Answer: 'Force equals mass times acceleration....'
  Matched: []
  Missing: ['rest', 'motion', 'external force', 'inertia', 'newton']
  Score: 0%



---
## 5. Step Matching (Multi-Part Answers)

For answers that require multiple steps (math problems, derivations).

In [7]:
class StepMatcher:
    """
    Match steps in multi-part answers.
    
    Useful for:
    - Math derivations
    - Physics problems
    - Chemistry equations
    """
    
    def __init__(self, semantic_scorer: SemanticScorer = None):
        self.semantic_scorer = semantic_scorer or SemanticScorer()
        self.similarity_threshold = 0.6  # Min similarity to consider step matched
    
    def extract_steps(self, answer: str) -> List[str]:
        """
        Extract steps from an answer.
        
        Splits on:
        - Numbered lists (1., 2., etc.)
        - Step markers (Step 1, Step 2)
        - Newlines
        - Semicolons
        """
        # Try numbered list
        steps = re.split(r'\d+[.)]\s*', answer)
        steps = [s.strip() for s in steps if s.strip()]
        
        if len(steps) <= 1:
            # Try step markers
            steps = re.split(r'step\s*\d*[:.)]?\s*', answer, flags=re.IGNORECASE)
            steps = [s.strip() for s in steps if s.strip()]
        
        if len(steps) <= 1:
            # Try newlines or semicolons
            steps = re.split(r'[\n;]', answer)
            steps = [s.strip() for s in steps if s.strip()]
        
        return steps
    
    def match_steps(
        self, 
        student_answer: str, 
        reference_steps: List[str]
    ) -> Tuple[List[int], float]:
        """
        Match student answer steps to reference steps.
        
        Returns:
            (matched_step_indices, score_ratio)
        """
        if not reference_steps:
            return [], 0.0
        
        student_steps = self.extract_steps(student_answer)
        if not student_steps:
            # Treat whole answer as one step
            student_steps = [student_answer]
        
        matched_indices = []
        
        for i, ref_step in enumerate(reference_steps):
            best_similarity = 0.0
            
            for student_step in student_steps:
                similarity = self.semantic_scorer.compute_similarity(ref_step, student_step)
                best_similarity = max(best_similarity, similarity)
            
            if best_similarity >= self.similarity_threshold:
                matched_indices.append(i)
        
        score_ratio = len(matched_indices) / len(reference_steps)
        return matched_indices, score_ratio


# Test
step_matcher = StepMatcher(semantic_scorer)

reference_steps = [
    "Identify the given values: m = 5kg, a = 10m/s¬≤",
    "Apply Newton's second law: F = ma",
    "Calculate: F = 5 √ó 10 = 50N"
]

student_answers = [
    "1. Given: mass is 5kg, acceleration is 10m/s¬≤. 2. Using F=ma. 3. F = 5*10 = 50N",
    "F = ma = 5 * 10 = 50 N",
    "The force is mass times acceleration so 50 newtons"
]

for answer in student_answers:
    matched, score = step_matcher.match_steps(answer, reference_steps)
    print(f"Answer: '{answer[:50]}...'")
    print(f"  Matched steps: {matched} / {len(reference_steps)}")
    print(f"  Score: {score:.0%}")
    print()

Answer: '1. Given: mass is 5kg, acceleration is 10m/s¬≤. 2. ...'
  Matched steps: [0, 1, 2] / 3
  Score: 100%

Answer: 'F = ma = 5 * 10 = 50 N...'
  Matched steps: [1, 2] / 3
  Score: 67%

Answer: 'The force is mass times acceleration so 50 newtons...'
  Matched steps: [1] / 3
  Score: 33%



---
## 6. Combined Scoring Engine

Combine all scoring methods with configurable weights.

In [8]:
class AnswerScoringEngine:
    """
    Complete answer scoring engine.
    
    Combines:
    - Semantic similarity
    - Keyword matching
    - Step matching
    - Subject-specific rules
    """
    
    # Scoring weights by component
    DEFAULT_WEIGHTS = {
        "semantic": 0.4,
        "keyword": 0.35,
        "steps": 0.25
    }
    
    def __init__(
        self,
        semantic_weight: float = 0.4,
        keyword_weight: float = 0.35,
        step_weight: float = 0.25
    ):
        self.weights = {
            "semantic": semantic_weight,
            "keyword": keyword_weight,
            "steps": step_weight
        }
        
        # Initialize scorers
        self.semantic_scorer = SemanticScorer()
        self.keyword_matcher = KeywordMatcher()
        self.step_matcher = StepMatcher(self.semantic_scorer)
    
    def score_answer(self, student_answer: str, answer_key: AnswerKey) -> ScoringResult:
        """
        Score a student answer against the answer key.
        
        Returns:
            ScoringResult with score, confidence, and breakdown
        """
        breakdown = {}
        feedback = []
        
        # 1. Semantic similarity
        semantic_score, semantic_conf = self.semantic_scorer.score_answer(
            student_answer, 
            answer_key.full_answer,
            max_marks=1.0  # Normalize to 0-1
        )
        breakdown["semantic"] = semantic_score
        
        # 2. Keyword matching
        if answer_key.keywords:
            matched_kw, missing_kw, keyword_score = self.keyword_matcher.match_keywords(
                student_answer,
                answer_key.keywords
            )
        else:
            matched_kw, missing_kw, keyword_score = [], [], semantic_score  # Use semantic if no keywords
        breakdown["keyword"] = keyword_score
        
        # 3. Step matching
        if answer_key.steps:
            matched_steps, step_score = self.step_matcher.match_steps(
                student_answer,
                answer_key.steps
            )
        else:
            matched_steps, step_score = [], semantic_score  # Use semantic if no steps
        breakdown["steps"] = step_score
        
        # 4. Weighted final score
        final_ratio = (
            breakdown["semantic"] * self.weights["semantic"] +
            breakdown["keyword"] * self.weights["keyword"] +
            breakdown["steps"] * self.weights["steps"]
        )
        
        final_score = final_ratio * answer_key.max_marks
        
        # 5. Confidence
        confidence = min(0.95, (
            semantic_conf * 0.5 +
            (1.0 if len(matched_kw) >= len(answer_key.keywords) * 0.5 else 0.5) * 0.3 +
            (1.0 if len(matched_steps) >= len(answer_key.steps) * 0.5 else 0.5) * 0.2
        ))
        
        # 6. Generate feedback
        if missing_kw:
            feedback.append(f"Consider mentioning: {', '.join(missing_kw[:3])}")
        
        if answer_key.steps and len(matched_steps) < len(answer_key.steps):
            missing_step_count = len(answer_key.steps) - len(matched_steps)
            feedback.append(f"Missing {missing_step_count} step(s) in your solution")
        
        if final_ratio >= 0.9:
            feedback.append("‚úÖ Excellent answer!")
        elif final_ratio >= 0.7:
            feedback.append("üëç Good answer with room for improvement")
        elif final_ratio >= 0.5:
            feedback.append("‚ö†Ô∏è Partial understanding shown")
        else:
            feedback.append("‚ùå Review this concept")
        
        return ScoringResult(
            score=round(final_score, 2),
            max_marks=answer_key.max_marks,
            confidence=round(confidence, 2),
            breakdown={k: round(v, 2) for k, v in breakdown.items()},
            feedback=feedback,
            matched_keywords=matched_kw,
            missing_keywords=missing_kw,
            matched_steps=matched_steps
        )


# Test
engine = AnswerScoringEngine()

# Create answer key
answer_key = AnswerKey(
    full_answer="Newton's second law states that Force equals mass times acceleration (F = ma). The acceleration of an object is directly proportional to the net force and inversely proportional to its mass.",
    keywords={
        "force": 0.2,
        "mass": 0.2,
        "acceleration": 0.2,
        "F = ma": 0.2,
        "proportional": 0.1,
        "newton": 0.1
    },
    steps=[
        "Force equals mass times acceleration",
        "Acceleration is proportional to force",
        "Acceleration is inversely proportional to mass"
    ],
    max_marks=10.0,
    subject=SubjectType.PHYSICS
)

# Test answers
test_cases = [
    ("Newton's second law: F = ma. Force equals mass times acceleration. Greater force means more acceleration, and greater mass means less acceleration.", "Excellent"),
    ("F = ma means force is mass multiplied by acceleration.", "Good"),
    ("Force makes things move faster.", "Partial"),
    ("The mitochondria is the powerhouse of the cell.", "Wrong")
]

print("=" * 60)
print("ANSWER SCORING DEMO")
print("=" * 60)

for answer, label in test_cases:
    result = engine.score_answer(answer, answer_key)
    print(f"\n[{label}] '{answer[:50]}...'")
    print(f"  Score: {result.score}/{result.max_marks}")
    print(f"  Confidence: {result.confidence:.0%}")
    print(f"  Breakdown: {result.breakdown}")
    print(f"  Matched keywords: {result.matched_keywords}")
    print(f"  Feedback: {result.feedback[0]}")

Loading embedding model: all-MiniLM-L6-v2...
ANSWER SCORING DEMO

[Excellent] 'Newton's second law: F = ma. Force equals mass tim...'
  Score: 6.93/10.0
  Confidence: 86%
  Breakdown: {'semantic': 0.74, 'keyword': 0.9, 'steps': 0.33}
  Matched keywords: ['force', 'mass', 'acceleration', 'F = ma', 'newton']
  Feedback: Consider mentioning: proportional

[Good] 'F = ma means force is mass multiplied by accelerat...'
  Score: 7.32/10.0
  Confidence: 95%
  Breakdown: {'semantic': 0.71, 'keyword': 0.8, 'steps': 0.67}
  Matched keywords: ['force', 'mass', 'acceleration', 'F = ma']
  Feedback: Consider mentioning: proportional, newton

[Partial] 'Force makes things move faster....'
  Score: 3.88/10.0
  Confidence: 60%
  Breakdown: {'semantic': 0.41, 'keyword': 0.4, 'steps': 0.33}
  Matched keywords: ['force', 'acceleration']
  Feedback: Consider mentioning: mass, F = ma, proportional

[Wrong] 'The mitochondria is the powerhouse of the cell....'
  Score: 1.06/10.0
  Confidence: 41%
  Breakdown

---
## 7. Subject-Specific Rules

Custom scoring rules for different subjects.

In [9]:
class SubjectSpecificScorer:
    """
    Apply subject-specific scoring rules.
    
    Examples:
    - Math: Must have correct final answer
    - Physics: Unit checking
    - Chemistry: Balanced equations
    """
    
    @staticmethod
    def check_math_answer(student: str, reference: str) -> Tuple[bool, float]:
        """
        Check if final numerical answer matches.
        
        Returns:
            (is_correct, bonus_modifier)
        """
        # Extract numbers from both
        student_nums = re.findall(r'-?\d+\.?\d*', student)
        ref_nums = re.findall(r'-?\d+\.?\d*', reference)
        
        if not ref_nums:
            return True, 1.0
        
        # Check if final answer matches
        ref_final = float(ref_nums[-1])
        
        for num in student_nums:
            try:
                if abs(float(num) - ref_final) < 0.01:
                    return True, 1.2  # 20% bonus for correct answer
            except:
                pass
        
        return False, 0.8  # 20% penalty for wrong answer
    
    @staticmethod
    def check_physics_units(student: str) -> Tuple[bool, List[str]]:
        """
        Check if answer includes correct units.
        """
        common_units = [
            (r'\bN\b|newton', 'Force (N)'),
            (r'm/s|m\/s', 'Velocity (m/s)'),
            (r'm/s[¬≤2]|m\/s[¬≤2]', 'Acceleration (m/s¬≤)'),
            (r'\bkg\b|kilogram', 'Mass (kg)'),
            (r'\bJ\b|joule', 'Energy (J)'),
            (r'\bW\b|watt', 'Power (W)'),
        ]
        
        found_units = []
        for pattern, unit_name in common_units:
            if re.search(pattern, student, re.IGNORECASE):
                found_units.append(unit_name)
        
        has_units = len(found_units) > 0
        return has_units, found_units
    
    @staticmethod
    def check_chemistry_balanced(student: str) -> Tuple[bool, str]:
        """
        Basic check for balanced equation indicators.
        """
        # Look for arrow and coefficients
        has_arrow = '‚Üí' in student or '->' in student or '=' in student
        has_coefficients = bool(re.search(r'\d+[A-Z]', student))
        
        if has_arrow and has_coefficients:
            return True, "Equation appears balanced"
        elif has_arrow:
            return False, "Consider adding coefficients"
        else:
            return False, "Missing reaction arrow"


# Demo
print("Subject-Specific Checks:\n")

# Math check
is_correct, modifier = SubjectSpecificScorer.check_math_answer(
    "F = 5 √ó 10 = 50",
    "F = ma = 5 √ó 10 = 50N"
)
print(f"Math: Correct={is_correct}, Modifier={modifier}")

# Physics units
has_units, units = SubjectSpecificScorer.check_physics_units("The force is 50N and velocity is 10 m/s")
print(f"Physics: Has units={has_units}, Found={units}")

# Chemistry
balanced, msg = SubjectSpecificScorer.check_chemistry_balanced("2H2 + O2 ‚Üí 2H2O")
print(f"Chemistry: Balanced={balanced}, {msg}")

Subject-Specific Checks:

Math: Correct=True, Modifier=1.2
Physics: Has units=True, Found=['Velocity (m/s)']
Chemistry: Balanced=True, Equation appears balanced


---
## 8. Export Scoring Module

In [10]:
from pathlib import Path

SCORING_MODULE = '''
"""
Answer Scoring Engine for Human-in-the-Loop Evaluation

Combines:
- Semantic similarity (sentence embeddings)
- Keyword matching (weighted terms)
- Step matching (multi-part answers)
- Subject-specific rules
"""
import re
import numpy as np
from typing import List, Dict, Tuple, Optional
from dataclasses import dataclass, field
from enum import Enum

try:
    from sentence_transformers import SentenceTransformer
    EMBEDDINGS_AVAILABLE = True
except ImportError:
    EMBEDDINGS_AVAILABLE = False


class SubjectType(Enum):
    MATH = "math"
    PHYSICS = "physics"
    CHEMISTRY = "chemistry"
    BIOLOGY = "biology"
    ENGLISH = "english"
    GENERAL = "general"


@dataclass
class AnswerKey:
    full_answer: str
    keywords: Dict[str, float] = field(default_factory=dict)
    steps: List[str] = field(default_factory=list)
    max_marks: float = 10.0
    subject: SubjectType = SubjectType.GENERAL


@dataclass
class ScoringResult:
    score: float
    max_marks: float
    confidence: float
    breakdown: Dict[str, float] = field(default_factory=dict)
    feedback: List[str] = field(default_factory=list)
    matched_keywords: List[str] = field(default_factory=list)
    missing_keywords: List[str] = field(default_factory=list)


class SemanticScorer:
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name) if EMBEDDINGS_AVAILABLE else None
    
    def compute_similarity(self, text1: str, text2: str) -> float:
        if self.model is None:
            words1, words2 = set(text1.lower().split()), set(text2.lower().split())
            return len(words1 & words2) / len(words1 | words2) if words1 | words2 else 0
        embeddings = self.model.encode([text1, text2])
        return float(np.dot(embeddings[0], embeddings[1]) / 
                    (np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1])))


class KeywordMatcher:
    def match_keywords(self, answer: str, keywords: Dict[str, float]) -> Tuple[List[str], List[str], float]:
        answer_lower = answer.lower()
        matched, missing = [], []
        matched_weight = 0.0
        for kw, weight in keywords.items():
            if kw.lower() in answer_lower:
                matched.append(kw)
                matched_weight += weight
            else:
                missing.append(kw)
        return matched, missing, matched_weight / sum(keywords.values()) if keywords else 0


class AnswerScoringEngine:
    def __init__(self):
        self.semantic_scorer = SemanticScorer()
        self.keyword_matcher = KeywordMatcher()
    
    def score_answer(self, student_answer: str, answer_key: AnswerKey) -> ScoringResult:
        sem_score = self.semantic_scorer.compute_similarity(student_answer, answer_key.full_answer)
        matched_kw, missing_kw, kw_score = self.keyword_matcher.match_keywords(
            student_answer, answer_key.keywords) if answer_key.keywords else ([], [], sem_score)
        
        final_ratio = sem_score * 0.5 + kw_score * 0.5
        final_score = round(final_ratio * answer_key.max_marks, 2)
        confidence = min(0.95, sem_score + 0.2)
        
        feedback = []
        if missing_kw:
            feedback.append(f"Consider mentioning: {\', \'.join(missing_kw[:3])}")
        if final_ratio >= 0.8:
            feedback.append("‚úÖ Great answer!")
        elif final_ratio >= 0.5:
            feedback.append("‚ö†Ô∏è Partial credit")
        else:
            feedback.append("‚ùå Review needed")
        
        return ScoringResult(
            score=final_score, max_marks=answer_key.max_marks, confidence=round(confidence, 2),
            breakdown={"semantic": round(sem_score, 2), "keyword": round(kw_score, 2)},
            feedback=feedback, matched_keywords=matched_kw, missing_keywords=missing_kw
        )
'''

output_path = Path("../utils/scoring.py")
output_path.parent.mkdir(parents=True, exist_ok=True)
output_path.write_text(SCORING_MODULE)
print(f"‚úÖ Exported scoring module to: {output_path.resolve()}")

‚úÖ Exported scoring module to: /Users/proxim/projects/ensureStudy/ml/utils/scoring.py


---
## Summary

This notebook implemented:

| Component | Purpose |
|-----------|--------|
| `SemanticScorer` | Sentence embeddings similarity |
| `KeywordMatcher` | Weighted keyword matching |
| `StepMatcher` | Multi-step answer matching |
| `AnswerScoringEngine` | Combined scoring pipeline |
| `SubjectSpecificScorer` | Math/Physics/Chem rules |

**Key features:**
- Partial credit for incomplete answers
- Synonym support
- Confidence scores for teacher review
- Feedback generation

**Next:** Backend API integration