# Model Training

This notebook is designed for training the models used in the Legal NLP + Explainability Toolkit project. It includes the necessary steps to fine-tune the BERT model for clause extraction and the T5 or GPT-4 model for summarization tasks.

#### Import necessary libraries


In [None]:
import pandas as pd
import numpy as np
import json
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import (
    AutoTokenizer, AutoModel, AutoModelForSequenceClassification,
    BertTokenizer, BertForSequenceClassification,
    Trainer, TrainingArguments
)
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, classification_report
from sklearn.preprocessing import MultiLabelBinarizer
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [None]:
%cd /content/notebooks/

/content/notebooks


#### Load Preprocessed Data and Metadata


In [None]:
def load_processed_data():
    """Load the preprocessed datasets and metadata"""

    # Load metadata
    with open('../data/processed/metadata.json', 'r') as f:
        metadata = json.load(f)

    print("=== Dataset Overview ===")
    print(f"Total clause types: {len(metadata['clause_types'])}")
    print(f"Clean clause names available: {'clean_clause_names' in metadata}")
    print(f"Train size: {metadata['train_size']}")
    print(f"Val size: {metadata['val_size']}")
    print(f"Test size: {metadata['test_size']}")
    print(f"Available datasets: {metadata['dataset_types']}")

    # Load multi-label datasets (start with these)
    train_ml = pd.read_csv('../data/processed/train_multi_label.csv')
    val_ml = pd.read_csv('../data/processed/val_multi_label.csv')
    test_ml = pd.read_csv('../data/processed/test_multi_label.csv')

    # Convert string representation of lists back to actual lists
    import ast
    for df in [train_ml, val_ml, test_ml]:
        df['labels'] = df['labels'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) else [])

    # Show sample with clean names
    print(f"\nSample training data with clean clause names:")
    sample_labels = train_ml['labels'].iloc[0] if len(train_ml) > 0 else []
    print(f"Sample labels: {sample_labels[:5]}{'...' if len(sample_labels) > 5 else ''}")

    return {
        'metadata': metadata,
        'train': train_ml,
        'val': val_ml,
        'test': test_ml
    }

data = load_processed_data()
print(f"\nDataset loaded with clean clause names!")

=== Dataset Overview ===
Total clause types: 41
Clean clause names available: True
Train size: 6092
Val size: 871
Test size: 1741
Available datasets: ['multi_label', 'binary_affiliate_license_licensee', 'binary_affiliate_license_licensor', 'binary_agreement_date', 'binary_anti_assignment', 'binary_audit_rights', 'binary_cap_on_liability', 'binary_change_of_control', 'binary_competitive_restriction_except', 'binary_covenant_not_to_sue', 'binary_document_name', 'binary_effective_date', 'binary_exclusivity', 'binary_expiration_date', 'binary_governing_law', 'binary_insurance', 'binary_ip_ownership_assignment', 'binary_irrevocable_or_perpetual_licen', 'binary_joint_ip_ownership', 'binary_license_grant', 'binary_liquidated_damages', 'binary_minimum_commitment', 'binary_most_favored_nation', 'binary_no_solicit_of_customers', 'binary_no_solicit_of_employees', 'binary_non_compete', 'binary_non_disparagement', 'binary_non_transferable_license', 'binary_notice_period_to_terminate_ren', 'binary_p

#### Multi-Label Dataset Class


In [None]:
class LegalClauseDataset(Dataset):
    """Dataset class for multi-label legal clause classification with clean names"""

    def __init__(self, texts, labels, tokenizer, max_length=512, clean_clause_names=None):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.clean_clause_names = clean_clause_names or []

        # Create multi-label binarizer using clean clause names
        self.mlb = MultiLabelBinarizer()
        if clean_clause_names:
            self.mlb.fit([clean_clause_names])  # Fit on all possible clean labels

        # Convert label lists to binary matrix
        self.label_matrix = self.mlb.transform(labels)

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts.iloc[idx])
        labels = self.label_matrix[idx]

        # Tokenize
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.FloatTensor(labels)
        }

# Initialize tokenizer
# MODEL_NAME = 'bert-base-uncased'  # Start with base BERT
MODEL_NAME = 'nlpaueb/legal-bert-base-uncased'  # Use legal-domain BERT
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Extract clean clause names from metadata
metadata = data['metadata']
if 'clean_clause_names' in metadata:
    # Use clean clause names mapping
    clean_clause_names_dict = metadata['clean_clause_names']
    clean_clause_names = list(set(clean_clause_names_dict.values()))  # Get unique clean names

    # Map original clause types to clean names for reference
    original_to_clean = clean_clause_names_dict
    print(f"Using clean clause names:")
    for i, (original, clean) in enumerate(list(original_to_clean.items())[:5]):
        print(f"  '{clean}' (was: {original[:50]}...)")
    print(f"  ... and {len(original_to_clean) - 5} more")
else:
    # Fallback to original clause types
    clean_clause_names = metadata['clause_types']
    original_to_clean = {ct: ct for ct in clean_clause_names}
    print("Using original clause types (clean names not found)")

# Create datasets with clean clause names
train_dataset = LegalClauseDataset(
    data['train']['text'],
    data['train']['labels'],
    tokenizer,
    max_length=512,
    clean_clause_names=clean_clause_names
)

val_dataset = LegalClauseDataset(
    data['val']['text'],
    data['val']['labels'],
    tokenizer,
    max_length=512,
    clean_clause_names=clean_clause_names
)

test_dataset = LegalClauseDataset(
    data['test']['text'],
    data['test']['labels'],
    tokenizer,
    max_length=512,
    clean_clause_names=clean_clause_names
)

print(f"\nDataset sizes:")
print(f"Train: {len(train_dataset)}")
print(f"Val: {len(val_dataset)}")
print(f"Test: {len(test_dataset)}")
print(f"Number of clean clause types: {len(clean_clause_names)}")

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Using clean clause names:
  'Affiliate License-Licensee' (was: Highlight the parts (if any) of this contract rela...)
  'Affiliate License-Licensor' (was: Highlight the parts (if any) of this contract rela...)
  'Agreement Date' (was: Highlight the parts (if any) of this contract rela...)
  'Anti-Assignment' (was: Highlight the parts (if any) of this contract rela...)
  'Audit Rights' (was: Highlight the parts (if any) of this contract rela...)
  ... and 36 more

Dataset sizes:
Train: 6092
Val: 871
Test: 1741
Number of clean clause types: 41


#### Multi-Label BERT Model

In [None]:
class MultiLabelBERT(nn.Module):
    """BERT model for multi-label classification with clean clause names"""

    def __init__(self, model_name, num_labels, dropout=0.3):
        super(MultiLabelBERT, self).__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
        self.num_labels = num_labels

    def forward(self, input_ids, attention_mask, labels=None):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        loss = None
        if labels is not None:
            # Use BCEWithLogitsLoss for multi-label classification
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(logits, labels)

        return {'loss': loss, 'logits': logits}

# Initialize model with clean clause names count
num_labels = len(clean_clause_names)
model = MultiLabelBERT(MODEL_NAME, num_labels)
model.to(device)

print(f"Model initialized for clean clause names:")
print(f"  Clean clause types: {num_labels}")
print(f"  Model parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"  Example clean names: {clean_clause_names[:5]}")

pytorch_model.bin:   0%|          | 0.00/440M [00:00<?, ?B/s]

Model initialized for clean clause names:
  Clean clause types: 41
  Model parameters: 109,513,769
  Example clean names: ['Third Party Beneficiary', 'Renewal Term', 'Cap On Liability', 'Warranty Duration', 'Audit Rights']


#### Training Configuration with Class Imbalance Handling


In [None]:
# Cell: Training Configuration with Class Imbalance Handling (Updated)
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import get_linear_schedule_with_warmup

def create_weighted_loss():
    """Create weighted loss to handle class imbalance using clean clause names"""
    # Calculate class weights based on frequency of clean clause names
    all_labels = []
    for labels in data['train']['labels']:
        all_labels.extend(labels)  # These are already clean names

    from collections import Counter
    label_counts = Counter(all_labels)
    total_samples = len(data['train'])

    # Calculate weights (inverse frequency) for clean clause names
    weights = []
    for clean_clause_name in clean_clause_names:
        count = label_counts.get(clean_clause_name, 1)
        weight = total_samples / (len(clean_clause_names) * count)
        weights.append(weight)

    print(f"Class imbalance handling:")
    print(f"  Most frequent clause: {max(label_counts.items(), key=lambda x: x[1])}")
    print(f"  Least frequent clause: {min(label_counts.items(), key=lambda x: x[1])}")
    print(f"  Weight range: {min(weights):.2f} - {max(weights):.2f}")

    return torch.FloatTensor(weights).to(device)

# Training parameters
BATCH_SIZE = 8  # Small batch size due to long sequences
LEARNING_RATE = 2e-5
EPOCHS = 35
MAX_GRAD_NORM = 1.0

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE)
total_steps = len(train_loader) * EPOCHS
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=total_steps // 10,
    num_training_steps=total_steps
)

# Weighted loss for class imbalance using clean clause names
class_weights = create_weighted_loss()
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)

print(f"\nTraining configuration:")
print(f"Batch size: {BATCH_SIZE}")
print(f"Learning rate: {LEARNING_RATE}")
print(f"Epochs: {EPOCHS}")
print(f"Total training steps: {total_steps}")
print(f"Using clean clause names for training")

Class imbalance handling:
  Most frequent clause: ('Governing Law', 657)
  Least frequent clause: ('Exclusivity', 1)
  Weight range: 0.23 - 148.59

Training configuration:
Batch size: 8
Learning rate: 2e-05
Epochs: 35
Total training steps: 26670
Using clean clause names for training


#### Training Loop with Metrics


In [None]:
def train_epoch(model, train_loader, optimizer, scheduler, criterion):
    """Train for one epoch"""
    model.train()
    total_loss = 0

    for batch_idx, batch in enumerate(train_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        outputs = model(input_ids, attention_mask, labels)
        loss = criterion(outputs['logits'], labels)

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), MAX_GRAD_NORM)

        optimizer.step()
        scheduler.step()

        total_loss += loss.item()

        if batch_idx % 50 == 0:
            print(f'Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')

    return total_loss / len(train_loader)

def evaluate(model, val_loader, criterion):
    """Evaluate the model"""
    model.eval()
    total_loss = 0
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask, labels)
            loss = criterion(outputs['logits'], labels)

            total_loss += loss.item()

            # Convert logits to predictions (threshold = 0.5)
            predictions = torch.sigmoid(outputs['logits']) > 0.5

            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Calculate metrics
    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)

    # Micro and macro averages
    from sklearn.metrics import precision_recall_fscore_support, hamming_loss

    precision_micro, recall_micro, f1_micro, _ = precision_recall_fscore_support(
        all_labels, all_predictions, average='micro'
    )

    precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
        all_labels, all_predictions, average='macro'
    )

    hamming = hamming_loss(all_labels, all_predictions)

    return {
        'loss': total_loss / len(val_loader),
        'precision_micro': precision_micro,
        'recall_micro': recall_micro,
        'f1_micro': f1_micro,
        'precision_macro': precision_macro,
        'recall_macro': recall_macro,
        'f1_macro': f1_macro,
        'hamming_loss': hamming
    }

# Training loop
training_history = {'train_loss': [], 'val_metrics': []}

print("Starting training...")
for epoch in range(EPOCHS):
    print(f"\n=== Epoch {epoch + 1}/{EPOCHS} ===")

    # Train
    train_loss = train_epoch(model, train_loader, optimizer, scheduler, criterion)
    print(f"Training Loss: {train_loss:.4f}")

    # Evaluate
    val_metrics = evaluate(model, val_loader, criterion)
    print(f"Validation Metrics:")
    for key, value in val_metrics.items():
        print(f"  {key}: {value:.4f}")

    # Save metrics
    training_history['train_loss'].append(train_loss)
    training_history['val_metrics'].append(val_metrics)

    # Save checkpoint
    torch.save({
        'epoch': epoch,
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'train_loss': train_loss,
        'val_metrics': val_metrics
    }, f'../models/bert/checkpoint_epoch_{epoch+1}.pt')

print("\nTraining completed!")

Starting training...

=== Epoch 1/35 ===


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Batch 0/762, Loss: 0.7787
Batch 50/762, Loss: 1.6247
Batch 100/762, Loss: 1.6076
Batch 150/762, Loss: 1.0086
Batch 200/762, Loss: 0.8259
Batch 250/762, Loss: 0.5945
Batch 300/762, Loss: 0.7651
Batch 350/762, Loss: 1.1467
Batch 400/762, Loss: 0.5341
Batch 450/762, Loss: 0.4242
Batch 500/762, Loss: 0.4332
Batch 550/762, Loss: 0.4097
Batch 600/762, Loss: 0.4745
Batch 650/762, Loss: 0.8911
Batch 700/762, Loss: 1.4061
Batch 750/762, Loss: 0.8013
Training Loss: 0.8128
Validation Metrics:
  loss: 0.6206
  precision_micro: 0.1147
  recall_micro: 0.2309
  f1_micro: 0.1533
  precision_macro: 0.0028
  recall_macro: 0.0244
  f1_macro: 0.0050
  hamming_loss: 0.0309

=== Epoch 2/35 ===
Batch 0/762, Loss: 0.3386
Batch 50/762, Loss: 0.6927
Batch 100/762, Loss: 0.2815
Batch 150/762, Loss: 0.7199
Batch 200/762, Loss: 0.2513
Batch 250/762, Loss: 0.2028
Batch 300/762, Loss: 0.5609
Batch 350/762, Loss: 0.6581
Batch 400/762, Loss: 0.6273
Batch 450/762, Loss: 0.8001
Batch 500/762, Loss: 0.2357
Batch 550/762,

#### Evaluation and Analysis


In [None]:
def detailed_evaluation(model, test_loader, clean_clause_names):
    """Detailed evaluation with per-class metrics using clean clause names"""
    model.eval()
    all_predictions = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask, labels)
            probs = torch.sigmoid(outputs['logits'])
            predictions = probs > 0.5

            all_predictions.extend(predictions.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    all_predictions = np.array(all_predictions)
    all_labels = np.array(all_labels)
    all_probs = np.array(all_probs)

    # Per-class metrics using clean clause names
    per_class_metrics = []
    for i, clean_clause_name in enumerate(clean_clause_names):
        y_true = all_labels[:, i]
        y_pred = all_predictions[:, i]
        y_prob = all_probs[:, i]

        if y_true.sum() > 0:  # Only calculate if positive samples exist
            precision, recall, f1, support = precision_recall_fscore_support(
                y_true, y_pred, average='binary'
            )

            per_class_metrics.append({
                'clean_clause_name': clean_clause_name,
                'precision': precision,
                'recall': recall,
                'f1': f1,
                'support': support,
                'predictions_made': y_pred.sum(),
                'avg_probability': y_prob.mean()
            })

    return pd.DataFrame(per_class_metrics), all_predictions, all_labels, all_probs

# Test the model with clean clause names
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
per_class_df, test_predictions, test_labels, test_probs = detailed_evaluation(
    model, test_loader, clean_clause_names
)

print("=== Test Results (Clean Clause Names) ===")
print("\nOverall Metrics:")
test_metrics = evaluate(model, test_loader, criterion)
for key, value in test_metrics.items():
    print(f"  {key}: {value:.4f}")

print(f"\nTop 10 Best Performing Clause Types (Clean Names):")
best_clauses = per_class_df.nlargest(10, 'f1')[['clean_clause_name', 'precision', 'recall', 'f1', 'support']]
print(best_clauses)

print(f"\nBottom 10 Clause Types - Most Challenging (Clean Names):")
worst_clauses = per_class_df.nsmallest(10, 'f1')[['clean_clause_name', 'precision', 'recall', 'f1', 'support']]
print(worst_clauses)

# Show clause distribution
print(f"\nClause Frequency Distribution:")
clause_freq = {}
for labels in data['train']['labels']:
    for label in labels:
        clause_freq[label] = clause_freq.get(label, 0) + 1

sorted_freq = sorted(clause_freq.items(), key=lambda x: x[1], reverse=True)[:10]
for clause, freq in sorted_freq:
    print(f"  {clause}: {freq} samples")

#### Save Model and Results


In [None]:
import os
os.makedirs('../models/bert/', exist_ok=True)

# Save the final model
torch.save(model.state_dict(), '../models/bert/final_model.pt')

# Save tokenizer
tokenizer.save_pretrained('../models/bert/')

# Save results with clean clause names
results = {
    'training_history': training_history,
    'test_metrics': test_metrics,
    'per_class_metrics': per_class_df.to_dict('records'),
    'clean_clause_names': clean_clause_names,
    'original_to_clean_mapping': original_to_clean,
    'model_config': {
        'model_name': MODEL_NAME,
        'max_length': 512,
        'batch_size': BATCH_SIZE,
        'learning_rate': LEARNING_RATE,
        'epochs': EPOCHS,
        'num_labels': num_labels,
        'uses_clean_clause_names': True
    }
}

with open('../models/bert/training_results.json', 'w') as f:
    json.dump(results, f, indent=2, default=str)

# Save clean clause names mapping for future use
with open('../models/bert/clean_clause_names.json', 'w') as f:
    json.dump({
        'clean_clause_names': clean_clause_names,
        'original_to_clean_mapping': original_to_clean
    }, f, indent=2)

print("Model and results saved with clean clause names!")
print(f"Model saved to: ../models/bert/")
print(f"Results saved to: ../models/bert/training_results.json")
print(f"Clean clause mapping saved to: ../models/bert/clean_clause_names.json")
print(f"\nClean clause names integration completed!")

Model and results saved with clean clause names!
Model saved to: ../models/bert/
Results saved to: ../models/bert/training_results.json
Clean clause mapping saved to: ../models/bert/clean_clause_names.json

Clean clause names integration completed!


#### Key Benefits Summary of Legal NLP + Explainability Toolkit Training Notebook


In [None]:

print("KEY BENEFITS OF THIS TRAINING NOTEBOOK (WITH CLEAN CLAUSE NAMES)")
print("=" * 70)

benefits = {
    "1. CLEAN CLAUSE NAME INTEGRATION": [
        "Uses human-readable clause names (e.g., 'Agreement Date')",
        "Eliminates verbose CUAD question format for clarity",
        "Maintains mapping to original clause types for reference",
        f"Training on {len(clean_clause_names)} clean clause types"
    ],

    "2. CLASS IMBALANCE HANDLING": [
        "Weighted loss function based on clean clause frequencies",
        "Addresses highly imbalanced legal clause distribution",
        "Prevents model bias toward common clause types",
        f"Custom class weights for clean clause names"
    ],

    "3. MULTI-LABEL CLASSIFICATION": [
        "Handles documents with multiple simultaneous clause types",
        "BCEWithLogitsLoss for independent label predictions",
        "Sigmoid activation for probability-based thresholding",
        "Real-world legal document complexity support"
    ],

    "4. COMPREHENSIVE EVALUATION": [
        "Per-class metrics using clean clause names",
        "Micro/macro precision, recall, F1 metrics",
        "Hamming loss for multi-label performance",
        "Interpretable results for legal practitioners"
    ],

    "5. ROBUST TRAINING INFRASTRUCTURE": [
        "Model checkpointing for recovery and analysis",
        "Learning rate scheduling with warmup",
        "Gradient clipping for training stability",
        "Progress tracking with clean name reporting"
    ],

    "6. PRODUCTION-READY OUTPUTS": [
        "Trained model weights and tokenizer saved",
        "Clean clause name mappings preserved",
        "Comprehensive training results with readable names",
        "Foundation for explainability analysis"
    ]
}

for category, items in benefits.items():
    print(f"\n{category}")
    print("-" * 50)
    for item in items:
        print(f"  {item}")

print(f"\n TRAINING SETUP SUMMARY (CLEAN NAMES)")
print("-" * 50)
print(f"  Model: {MODEL_NAME}")
print(f"  Training samples: {len(train_dataset):,}")
print(f"  Validation samples: {len(val_dataset):,}")
print(f"  Test samples: {len(test_dataset):,}")
print(f"  Clean clause types: {num_labels}")
print(f"  Training epochs: {EPOCHS}")
print(f"  Learning rate: {LEARNING_RATE}")

print(f"\n CLEAN CLAUSE NAME EXAMPLES")
print("-" * 50)
sample_names = clean_clause_names[:8]
for name in sample_names:
    print(f"  • {name}")
if len(clean_clause_names) > 8:
    print(f"  ... and {len(clean_clause_names) - 8} more clause types")

print(f"\n WHAT THIS ENABLES")
print("-" * 50)
print("Multi-label BERT model with readable clause names")
print("Legal practitioner-friendly model outputs")
print("Proper handling of class imbalance from EDA findings")
print("Comprehensive evaluation metrics for legal domain")
print("Foundation for explainability analysis (LIME, SHAP)")
print("Production-ready legal document processing pipeline")

print(f"\n NEXT STEPS")
print("-" * 50)
print("  → Run training loop to train the model")
print("  → Analyze per-class performance with clean names")
print("  → Implement explainability techniques")
print("  → Deploy for legal document clause extraction")
print("  → Generate human-readable model explanations")