In [25]:
import re

def hyperlink_citations_to_sources(text):
    # Find all citation numbers in the Sources section (including those in multi-citation format)
    sources_section = text.split("Sources", 1)[-1] if "Sources" in text else ""
    source_ids = set(re.findall(r"\d+", sources_section))

    # Function to replace matches in the body text
    def replacer(match):
        # Extract the content inside brackets
        content = match.group(1)
        
        # Split the content by commas and process each part
        parts = [part.strip() for part in content.split(',')]
        hyperlinked_parts = []
        
        for part in parts:
            if part.isdigit() and part in source_ids:
                hyperlinked_parts.append(f"[{part}](#{part})")
            else:
                hyperlinked_parts.append(part)
        
        # Join the parts back together
        return f"[{', '.join(hyperlinked_parts)}]"

    # Function to add anchors to sources
    def add_anchor(match):
        number = match.group(1)
        return f'<a id="{number}"></a>[{number}]'

    # Replace citations in the main body (before "Sources")
    if "Sources" in text:
        body, sources = text.split("Sources", 1)
        # Update body with hyperlinks - matches [number] or [number, number, ...] patterns
        updated_body = re.sub(r"\[([0-9,\s]+)\]", replacer, body)
        # Add anchors to sources section
        updated_sources = re.sub(r"\[(\d+)\]", add_anchor, sources)
        return updated_body + "Sources" + updated_sources
    else:
        return text  # Fallback

# Example usage
with open(r"reports\Sudan\corrected_reports\corrected_security_report_Sudan_HybridCypher_20250707_1202.md", "r", encoding="utf-8") as f:
    content = f.read()

linked_content = hyperlink_citations_to_sources(content)

with open("report_with_citations.md", "w", encoding="utf-8") as f:
    f.write(linked_content)


In [24]:
# Test specific cases
test_cases = [
    "This is a test [37, 7] with multiple citations.",
    "Another test [3, 3] with duplicate numbers.",
    "Mixed case [14, 2, 37] with multiple different numbers.",
    "Single citation [7] should also work."
]

print("Testing citation hyperlinking:")
for i, test in enumerate(test_cases, 1):
    # Create a mini-document with sources for testing
    mini_doc = f"{test}\n\nSources\n[2] Source 2\n[3] Source 3\n[7] Source 7\n[14] Source 14\n[37] Source 37"
    result = hyperlink_citations_to_sources(mini_doc)
    print(f"\nTest {i}:")
    print(f"Input:  {test}")
    print(f"Output: {result.split('Sources')[0].strip()}")

# Also test the actual output for the specific problematic lines
print("\n" + "="*60)
print("Checking actual output for [37, 7] and [3, 3] cases:")

lines_with_citations = []
for line in linked_content.split('\n'):
    if '[37, 7]' in line or '[3, 3]' in line:
        lines_with_citations.append(line)

for line in lines_with_citations[:5]:  # Show first 5 matches
    print(f"Line: {line}")

Testing citation hyperlinking:

Test 1:
Input:  This is a test [37, 7] with multiple citations.
Output: This is a test [[37](#37), [7](#7)] with multiple citations.

Test 2:
Input:  Another test [3, 3] with duplicate numbers.
Output: Another test [[3](#3), [3](#3)] with duplicate numbers.

Test 3:
Input:  Mixed case [14, 2, 37] with multiple different numbers.
Output: Mixed case [[14](#14), [2](#2), [37](#37)] with multiple different numbers.

Test 4:
Input:  Single citation [7] should also work.
Output: Single citation [[7](#7)] should also work.

Checking actual output for [37, 7] and [3, 3] cases:
