# 01. Privacy Guardrails Deep Dive

In this notebook, we explore advanced techniques for detecting and handling Personally Identifiable Information (PII).

In [None]:
import sys
import os
import json

# Add project root to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from intermediate.pii_detection import PIIDetector, PIIResult

## 1. Load Sample Data
Let's load the PII samples we generated.

In [None]:
try:
    with open('../data/samples/pii_samples.json', 'r') as f:
        pii_samples = json.load(f)
except FileNotFoundError:
    print("Sample data not found. Running fallback generation...")
    pii_samples = ["My email is fallback@example.com"]

print(f"Loaded {len(pii_samples)} samples.")

## 2. Detection and Redaction
We use our `PIIDetector` to find and redact sensitive info.

In [None]:
detector = PIIDetector()

print(f"{'Original':<50} | {'Redacted':<50}")
print("-" * 105)

for sample in pii_samples:
    if detector.check(sample) == PIIResult.FOUND:
        redacted = detector.redact(sample)
        print(f"{sample:<50} | {redacted:<50}")
    else:
        print(f"{sample:<50} | [No PII Detected]")

## 3. Custom Redaction Strategy
Sometimes you want to mask characters instead of replacing with a tag.

In [None]:
import re

def mask_email(text):
    # Simple mask: j***@example.com
    def replace(match):
        email = match.group(0)
        user, domain = email.split('@')
        if len(user) > 1:
            masked_user = user[0] + '*' * (len(user) - 1)
        else:
            masked_user = '*'
        return f"{masked_user}@{domain}"
    
    return re.sub(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", replace, text)

sample = "Contact john.doe@example.com for details."
print(f"Original: {sample}")
print(f"Masked:   {mask_email(sample)}")