# Chapter 1: Evaluations and Alignments for AI

Hands-on examples to accompany Chapter 1.

In [None]:
import re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Verifiable Task Example: Code Evaluation

HumanEval-style evaluation where tests determine correctness.

In [None]:
def has_close_elements(numbers: list[float], threshold: float) -> bool:
    """Check if any two numbers are closer than threshold."""
    for i, num1 in enumerate(numbers):
        for j, num2 in enumerate(numbers):
            if i != j and abs(num1 - num2) < threshold:
                return True
    return False

# Verifiable: tests pass or fail
tests = [
    ([1.0, 2.0, 3.0], 0.5, False),
    ([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3, True),
]

for nums, thresh, expected in tests:
    result = has_close_elements(nums, thresh)
    status = "✓" if result == expected else "✗"
    print(f"{status} has_close_elements({nums}, {thresh}) = {result}")

## Challenge: Answer Extraction

Extracting answers from chain-of-thought responses.

In [None]:
responses = [
    "Let me work through this. 42 x 38 = 1680 - 84 = 1596. The answer is 1,596.",
    "1596",
    "The answer is 1,596",
]

def extract_number(text: str) -> int | None:
    """Extract the final number from a response."""
    text = text.replace(",", "")
    numbers = re.findall(r'\b\d+\b', text)
    return int(numbers[-1]) if numbers else None

for r in responses:
    print(f"{extract_number(r)} <- '{r[:40]}...'")

## Faithfulness Metric

Measuring hallucination by checking claim support.

In [None]:
def faithfulness(claims: list[dict]) -> float:
    """Faithfulness = Supported Claims / Total Claims"""
    if not claims:
        return 1.0
    return sum(c['supported'] for c in claims) / len(claims)

# Example: AI response about an arXiv paper
claims = [
    {"claim": "Year: 2020", "supported": True},
    {"claim": "Source: arXiv", "supported": True},
    {"claim": "Title: 'GLU Variants Improve Transformer'", "supported": True},
    {"claim": "arXiv: 2002.05202v1", "supported": True},
    {"claim": "Authors: Narang, Chung", "supported": False},  # Hallucinated!
]

score = faithfulness(claims)
print(f"Faithfulness: {score:.0%}")
print(f"Hallucination rate: {1-score:.0%}")

## Exercises

1. Classify these tasks as verifiable or open-ended:
   - Sentiment classification
   - Poetry generation
   - SQL query generation
   - Customer support response

2. An AI response has 12 claims, 3 unsupported. Calculate faithfulness.

3. Design a scenario where policy alignment conflicts with principled alignment.

In [None]:
# Exercise 2
total, unsupported = 12, 3
print(f"Faithfulness: {(total - unsupported) / total:.0%}")