# Milestone 7: Responsible AI and Testing

Demonstrates:
1. Adversarial testing
2. Hallucination detection
3. LangSmith tracing
4. Test report generation

In [None]:
import sys
sys.path.insert(0, '..')

from src.rag_baseline import BaselineRAG
from src.responsible_ai import (
    calculate_hallucination_score,
    run_robustness_test,
    run_adversarial_tests,
    generate_test_report
)
from src.langsmith_integration import init_langsmith, LangSmithTracer
from dotenv import load_dotenv

load_dotenv()
print('✓ Modules imported')

## Test 1: Hallucination Detection

In [None]:
rag = BaselineRAG('../faiss_index')
result = rag.query('What is GDPR?')

score = calculate_hallucination_score(result['answer'], result.get('sources', []))
print(f"Hallucination risk: {score['hallucination_risk']}")
print(f"Overlap score: {score['overlap_score']:.2f}")
print(f"Supported ratio: {score['supported_ratio']:.2f}")

## Test 2: Robustness Testing

In [None]:
robustness_results = run_robustness_test(rag)
print(f"Pass rate: {robustness_results['pass_rate']:.1%}")
print(f"Passed: {robustness_results['num_passed']}/{robustness_results['num_total']}")

for r in robustness_results['results'][:3]:
    print(f"\n{r['name']}: {'✓' if r['passed'] else '✗'}")
    if r.get('issues'):
        print(f"  Issues: {r['issues']}")

## Test 3: Adversarial Testing

In [None]:
from src.guardrails import SafetyGuard

safe_rag = SafetyGuard(rag)
adv_results = run_adversarial_tests(safe_rag)

print(f"Adversarial tests passed: {adv_results['num_passed']}/{adv_results['num_tests']}")
for r in adv_results['results'][:3]:
    print(f"\n{r['prompt']}: {'✓' if r.get('passed') else '✗'}")

## Test 4: LangSmith Tracing

In [None]:
# Initialize LangSmith
config = init_langsmith(project_name='gdpr-rag-testing')
print(f"LangSmith enabled: {config['enabled']}")
print(f"Project: {config['project_name']}")

# Use tracer context manager
with LangSmithTracer('test-session') as tracer:
    result = rag.query('What are data subject rights?')
    tracer.log_query('What are data subject rights?', result)
    print('\n✓ Query traced to LangSmith')

## Generate Test Report

In [None]:
report = generate_test_report(robustness_results, adv_results)
print(report)

print('\n✓ All responsible AI tests complete')