# Tutorial: Validation and Analysis

This tutorial demonstrates how to evaluate classifier performance and generate publication-quality metrics and visualizations.

In [None]:
from openness_classifier.validation import (
    validate_classifications,
    cross_validate,
    compute_metrics,
    performance_comparison,
)
from openness_classifier.visualization import (
    plot_confusion_matrix,
    plot_validation_results,
    plot_class_distribution,
)
from openness_classifier.data import load_training_data, train_test_split, validate_training_data
from openness_classifier.classifier import OpennessClassifier
from openness_classifier.config import load_config
from pathlib import Path

## 1. Load and Analyze Training Data

In [None]:
data_path = Path('../../resources/abpoll-open-b71bd12/data/processed/articles_reviewed.csv')

if data_path.exists():
    data_examples, code_examples = load_training_data(data_path)
    
    print(f"Data examples: {len(data_examples)}")
    print(f"Code examples: {len(code_examples)}")
    
    # Validate training data
    validation = validate_training_data(data_examples)
    print(f"\nData Quality Check:")
    print(f"  Valid: {validation['valid']}")
    print(f"  Class distribution: {validation['class_distribution']}")
    if validation['warnings']:
        print(f"  Warnings: {validation['warnings']}")
else:
    print(f"Data not found at {data_path}")

## 2. Split Data and Train Classifier

In [None]:
if data_path.exists():
    # Train/test split
    train_data, test_data = train_test_split(data_examples, test_size=0.2)
    
    print(f"Training set: {len(train_data)} examples")
    print(f"Test set: {len(test_data)} examples")
    
    # Create classifier
    config = load_config()
    classifier = OpennessClassifier.from_config(config)

## 3. Run Validation

In [None]:
if 'test_data' in dir() and test_data:
    print("Running validation on test set...")
    
    result = validate_classifications(
        test_examples=test_data,
        classifier=classifier,
        progress_callback=lambda p, t: print(f"\r{p}/{t}", end="")
    )
    
    print(f"\n\nValidation Complete!")
    print(f"Overall Accuracy: {result.overall_accuracy:.1%}")
    
    if result.data_metrics:
        print(f"\nData Classification Metrics:")
        print(f"  Accuracy: {result.data_metrics.accuracy:.3f}")
        print(f"  Cohen's Kappa: {result.data_metrics.cohens_kappa:.3f}")
        print(f"  Macro F1: {result.data_metrics.macro_f1:.3f}")

## 4. Generate Visualizations

In [None]:
if 'result' in dir() and result.confusion_matrices:
    # Plot confusion matrix
    if 'data' in result.confusion_matrices:
        fig = plot_confusion_matrix(
            result.confusion_matrices['data'],
            title='Data Availability Classification',
            normalize=True
        )
        plt.show()

## 5. Export Results for Manuscript

In [None]:
if 'result' in dir():
    # Generate markdown for manuscript
    markdown = result.to_markdown()
    print(markdown)
    
    # Save to file
    Path('../../data/validation_results.md').write_text(markdown)
    
    # Export JSON for archiving
    result.to_json('../../data/validation_results.json')

## Summary

Key metrics:
- **Accuracy**: Overall classification accuracy
- **Cohen's Kappa**: Inter-rater agreement (target: >0.6)
- **Macro F1**: Average F1 across classes
- **Per-class precision/recall**: Performance by category

Export formats:
- `to_markdown()`: Tables for manuscripts
- `to_json()`: Full results for archiving
- `plot_confusion_matrix()`: Publication-quality figures