In [None]:
"""
================================================================================
COLOR DIAGNOSTIC TOOL
================================================================================
Run this script first to discover what color values your PDF reader uses.
This will help you configure the COLOR_MAP in the main script correctly.

HOW TO USE:
1. Highlight some text in your PDF with different colors
2. Add any comment to each highlight (doesn't matter what)
3. Run this script
4. Copy the RGB values it shows into your COLOR_MAP
================================================================================
"""

import fitz

# Set your PDF path here
pdf_path = r"C:\Users\YourName\Documents\research\test.pdf"

print("=" * 70)
print("PDF COLOR DIAGNOSTIC TOOL")
print("=" * 70)
print(f"\nAnalyzing: {pdf_path}\n")

doc = fitz.open(pdf_path)

print("ANNOTATIONS FOUND:\n")
for page_num, page in enumerate(doc, 1):
    annots = list(page.annots())
    if annots:
        print(f"Page {page_num}: {len(annots)} annotation(s)")
        for i, annot in enumerate(annots, 1):
            content = annot.info.get("content", "")
            color = annot.colors.get("stroke", (0.0, 0.0, 0.0))
            rgb_str = ",".join(f"{v:.1f}" for v in color)
            
            print(f"\n  Annotation #{i}:")
            print(f"    RGB Value: {rgb_str}")
            print(f"    Comment: {content[:60]}...")
            
            # Suggest which color this might be
            if 1.0 in color and 0.9 <= color[1] <= 1.0 and color[2] < 0.5:
                print(f"    → Looks like: YELLOW")
            elif color[0] > 0.9 and 0.5 <= color[1] <= 0.7:
                print(f"    → Looks like: PINK")
            elif color[1] > 0.9 and color[0] < 0.2 and color[2] < 0.2:
                print(f"    → Looks like: GREEN")
            elif color[1] > 0.9 and color[2] > 0.9 and color[0] < 0.2:
                print(f"    → Looks like: CYAN")
            elif color[0] > 0.9 and color[1] < 0.2 and color[2] < 0.2:
                print(f"    → Looks like: RED")

print("\n" + "=" * 70)
print("COPY THESE VALUES TO YOUR COLOR_MAP:")
print("=" * 70)
print("\nCOLOR_MAP = {")

doc = fitz.open(pdf_path)
seen_colors = set()
for page in doc:
    for annot in page.annots():
        color = annot.colors.get("stroke", (0.0, 0.0, 0.0))
        rgb_str = ",".join(f"{v:.1f}" for v in color)
        if rgb_str not in seen_colors and rgb_str != "0.0,0.0,0.0":
            seen_colors.add(rgb_str)
            print(f'    "{rgb_str}": "YourCategoryName",')

print("}")
print("=" * 70)