# Demo

## Setup

In [6]:
import sys
from pathlib import Path

# Try finding project root by searching for marker (like pyproject.toml)
def find_project_root(start: Path, marker: str = "pyproject.toml") -> Path:
    for parent in [start] + list(start.parents):
        if (parent / marker).exists():
            return parent
    raise RuntimeError("Project root not found")

root_dir = find_project_root(Path.cwd())
sys.path.append(str(root_dir / "src"))


In [8]:
from charfinder.core.core_main import (
    find_chars,
    find_chars_raw,
    find_chars_with_info,
)
from charfinder.constants import FuzzyAlgorithm, HybridAggFunc

#### Simple Exact Match (Default)

In [9]:
query = "arrow"
results = list(find_chars(query))

print("\n".join(results))


[INFO] Rebuilding Unicode name cache. This may take a few seconds...
[INFO] Downloaded and cached "UnicodeData.txt" from https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
[INFO] Loaded "UnicodeData.txt" from local file: C:\Users\HamedVAHEB\Documents\Projects\Python\charfinder\data\UnicodeData.txt
[INFO] Cache written to: "C:\Users\HamedVAHEB\Documents\Projects\Python\charfinder\data\cache\unicode_name_cache.json"


[INFO] Found 560 match(es) for query: 'arrow'
CODE       CHAR NAME
--------------------
U+02FF     Àø   MODIFIER LETTER LOW LEFT ARROW  (\u02ff)
U+034D     Õç   COMBINING LEFT RIGHT ARROW BELOW  (\u034d)
U+034E     Õé   COMBINING UPWARDS ARROW BELOW  (\u034e)
U+0362     Õ¢   COMBINING DOUBLE RIGHTWARDS ARROW BELOW  (\u0362)
U+1AB3     ·™≥   COMBINING DOWNWARDS ARROW  (\u1ab3)
U+20D4     ‚Éî   COMBINING ANTICLOCKWISE ARROW ABOVE  (\u20d4)
U+20D5     ‚Éï   COMBINING CLOCKWISE ARROW ABOVE  (\u20d5)
U+20D6     ‚Éñ   COMBINING LEFT ARROW ABOVE  (\u20d6)
U+20D7     ‚Éó   COMBINING RIGHT ARROW ABOVE  (\u20d7)
U+20E1     ‚É°   COMBINING LEFT RIGHT ARROW ABOVE  (\u20e1)
U+20EA     ‚É™   COMBINING LEFTWARDS ARROW OVERLAY  (\u20ea)
U+20EE     ‚ÉÆ   COMBINING LEFT ARROW BELOW  (\u20ee)
U+20EF     ‚ÉØ   COMBINING RIGHT ARROW BELOW  (\u20ef)
U+2190     ‚Üê   LEFTWARDS ARROW  (\u2190)
U+2191     ‚Üë   UPWARDS ARROW  (\u2191)
U+2192     ‚Üí   RIGHTWARDS ARROW  (\u2192)
U+2193     ‚Üì   DOWNWARDS ARROW

#### Fuzzy Match, Low Threshold

In [11]:
query = "arw"
results = list(find_chars(query, fuzzy=True, threshold=0.3))

print("\n".join(results))


[INFO] Loaded Unicode name cache from: "C:\Users\HamedVAHEB\Documents\Projects\Python\charfinder\data\cache\unicode_name_cache.json"


[INFO] No exact match found for 'arw', trying fuzzy...
[INFO] Fuzzy settings: threshold=0.3, agg_fn=mean
[DEBUG] Skipped char '+' (U+002B) ‚Äî no valid score computed.
[DEBUG] Skipped char '-' (U+002D) ‚Äî no valid score computed.
[DEBUG] Skipped char '1' (U+0031) ‚Äî no valid score computed.
[DEBUG] Skipped char '5' (U+0035) ‚Äî no valid score computed.
[DEBUG] Skipped char '6' (U+0036) ‚Äî no valid score computed.
[DEBUG] Skipped char '7' (U+0037) ‚Äî no valid score computed.
[DEBUG] Skipped char '8' (U+0038) ‚Äî no valid score computed.
[DEBUG] Skipped char '9' (U+0039) ‚Äî no valid score computed.
[DEBUG] Skipped char ':' (U+003A) ‚Äî no valid score computed.
[DEBUG] Skipped char ';' (U+003B) ‚Äî no valid score computed.
[DEBUG] Skipped char '~' (U+007E) ‚Äî no valid score computed.
[DEBUG] Skipped char '¬¢' (U+00A2) ‚Äî no valid score computed.
[DEBUG] Skipped char '¬£' (U+00A3) ‚Äî no valid score computed.
[DEBUG] Skipped char '¬•' (U+00A5) ‚Äî no valid score computed.
[DEBUG] Sk

#### Fuzzy Match, High Threshold

In [12]:
query = "arw"
results = list(find_chars(query, fuzzy=True, threshold=0.85))

print("\n".join(results))


[INFO] Loaded Unicode name cache from: "C:\Users\HamedVAHEB\Documents\Projects\Python\charfinder\data\cache\unicode_name_cache.json"


[INFO] No exact match found for 'arw', trying fuzzy...
[INFO] Fuzzy settings: threshold=0.85, agg_fn=mean
[DEBUG] Skipped char '+' (U+002B) ‚Äî no valid score computed.
[DEBUG] Skipped char '-' (U+002D) ‚Äî no valid score computed.
[DEBUG] Skipped char '1' (U+0031) ‚Äî no valid score computed.
[DEBUG] Skipped char '5' (U+0035) ‚Äî no valid score computed.
[DEBUG] Skipped char '6' (U+0036) ‚Äî no valid score computed.
[DEBUG] Skipped char '7' (U+0037) ‚Äî no valid score computed.
[DEBUG] Skipped char '8' (U+0038) ‚Äî no valid score computed.
[DEBUG] Skipped char '9' (U+0039) ‚Äî no valid score computed.
[DEBUG] Skipped char ':' (U+003A) ‚Äî no valid score computed.
[DEBUG] Skipped char ';' (U+003B) ‚Äî no valid score computed.
[DEBUG] Skipped char '~' (U+007E) ‚Äî no valid score computed.
[DEBUG] Skipped char '¬¢' (U+00A2) ‚Äî no valid score computed.
[DEBUG] Skipped char '¬£' (U+00A3) ‚Äî no valid score computed.
[DEBUG] Skipped char '¬•' (U+00A5) ‚Äî no valid score computed.
[DEBUG] S

#### Compare Fuzzy Algorithms

In [13]:
query = "arrw"
for algo in FuzzyAlgorithm.__args__:
    print(f"\n=== Fuzzy Algorithm: {algo} ===")
    results = list(find_chars(query, fuzzy=True, fuzzy_algo=algo, threshold=0.4))
    print("\n".join(results[:5]) or "No results")



=== Fuzzy Algorithm: sequencematcher ===


ValueError: Unknown or unsupported fuzzy algorithm. Supported values: hybrid, hybrid_score, lev, levenshtein, levenshtein_ratio, normalized, normalized_ratio, rapidfuzz, sequencematcher, simple, simple_ratio, token_sort, token_sort_ratio, tsr

In [None]:
from charfinder.core.core_main import (
    find_chars,
    find_chars_raw,
    find_chars_with_info,
)
from charfinder.constants import FuzzyAlgorithm, HybridAggFunc

# Unicode Normalization Test

In [2]:
import unicodedata

# Define some example strings
input_text = "√©"  # Composed 'e' with acute accent
decomposed_e = "eÃÅ"  # Decomposed 'e' + acute accent

# Apply different Unicode normalization forms
nfc = unicodedata.normalize("NFC", input_text)  # Composed form
nfd = unicodedata.normalize("NFD", input_text)  # Decomposed form
nfkc = unicodedata.normalize("NFKC", input_text)  # Compatibility composed form
nfkd = unicodedata.normalize("NFKD", input_text)  # Compatibility decomposed form

# Print the results
print(f"NFC: {nfc}")  # Composed 'EÃÅ'
print(f"NFD: {nfd}")  # Decomposed 'E\u0301'
print(f"NFKC: {nfkc}")  # Compatibility composed 'EÃÅ'
print(f"NFKD: {nfkd}")  # Compatibility decomposed 'E\u0301'

# Normalize manually using the decomposed string
print(f"NFC from decomposed: {unicodedata.normalize('NFC', decomposed_e)}")  # Should be 'EÃÅ'
print(f"NFD from decomposed: {unicodedata.normalize('NFD', decomposed_e)}")  # Should be 'E\u0301'


NFC: √©
NFD: eÃÅ
NFKC: √©
NFKD: eÃÅ
NFC from decomposed: √©
NFD from decomposed: eÃÅ


In [4]:
# Define the test characters
characters = ['√©']

# Normalization results
results = {}

for char in characters:
    results[char] = {
        'NFC': unicodedata.normalize('NFC', char).upper(),
        'NFD': unicodedata.normalize('NFD', char).upper(),
        'NFKC': unicodedata.normalize('NFKC', char).upper(),
        'NFKD': unicodedata.normalize('NFKD', char).upper()
    }

results


{'√©': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'}}

In [9]:
import unicodedata

# Test character
test_char = '√©'

# Normalization results
nfc_result = unicodedata.normalize('NFC', test_char)
nfd_result = unicodedata.normalize('NFD', test_char)
nfkc_result = unicodedata.normalize('NFKC', test_char)
nfkd_result = unicodedata.normalize('NFKD', test_char)

# Expected results for verification
expected_nfc = '√©'  # Composed
expected_nfd = 'eÃÅ'  # Decomposed: 'e' + combining acute accent
expected_nfkc = '√©'  # Composed
expected_nfkd = 'eÃÅ'  # Decomposed: 'e' + combining acute accent

# Assertions to verify the results
assert nfc_result == expected_nfc, f"Expected NFC: {expected_nfc}, but got {nfc_result}"
assert nfd_result == expected_nfd, f"Expected NFD: {expected_nfd}, but got {nfd_result}"
assert nfkc_result == expected_nfkc, f"Expected NFKC: {expected_nfkc}, but got {nfkc_result}"
assert nfkd_result == expected_nfkd, f"Expected NFKD: {expected_nfkd}, but got {nfkd_result}"

# Print the results if everything passes
print(f"NFC: {nfc_result}")
print(f"NFD: {nfd_result}")
print(f"NFKC: {nfkc_result}")
print(f"NFKD: {nfkd_result}")


NFC: √©
NFD: eÃÅ
NFKC: √©
NFKD: eÃÅ


In [11]:
import unicodedata

# Example characters
characters = ['√©', '√®', '√º', '√ß']

# Normalize results and manually check their Unicode code points
results = {}

for char in characters:
    results[char] = {
        'NFC': unicodedata.normalize('NFC', char),
        'NFD': unicodedata.normalize('NFD', char),
        'NFKC': unicodedata.normalize('NFKC', char),
        'NFKD': unicodedata.normalize('NFKD', char),
        'code_points': [ord(c) for c in char]  # To see the exact code points of characters
}

# Print results to verify normalization and character code points
results


{'√©': {'NFC': '√©',
  'NFD': 'eÃÅ',
  'NFKC': '√©',
  'NFKD': 'eÃÅ',
  'code_points': [233]},
 '√®': {'NFC': '√®',
  'NFD': 'eÃÄ',
  'NFKC': '√®',
  'NFKD': 'eÃÄ',
  'code_points': [232]},
 '√º': {'NFC': '√º',
  'NFD': 'uÃà',
  'NFKC': '√º',
  'NFKD': 'uÃà',
  'code_points': [252]},
 '√ß': {'NFC': '√ß',
  'NFD': 'cÃß',
  'NFKC': '√ß',
  'NFKD': 'cÃß',
  'code_points': [231]}}

In [12]:
# Normalization results matrix for each normalization form
normalization_matrix = {
    "√©": {
        "NFC": "√â",
        "NFD": "EÃÅ",
        "NFKC": "√â",
        "NFKD": "EÃÅ"
    },
    "√®": {
        "NFC": "EÃÄ",
        "NFD": "E\u0300",
        "NFKC": "EÃÄ",
        "NFKD": "E\u0300"
    },
    "√º": {
        "NFC": "UÃà",
        "NFD": "U\u0308",
        "NFKC": "UÃà",
        "NFKD": "U\u0308"
    },
    "√ß": {
        "NFC": "CÃß",
        "NFD": "C\u0327",
        "NFKC": "CÃß",
        "NFKD": "C\u0327"
    },
    "√∂": {
        "NFC": "OÃà",
        "NFD": "O\u0308",
        "NFKC": "OÃà",
        "NFKD": "O\u0308"
    },
    "≈ì": {
        "NFC": "≈í",
        "NFD": "O\u0302E",
        "NFKC": "≈í",
        "NFKD": "O\u0302E"
    },
    "√¶": {
        "NFC": "√Ü",
        "NFD": "A\u030C",
        "NFKC": "√Ü",
        "NFKD": "A\u030C"
    },
    "eÃÅ": {
        "NFC": "EÃÅ",
        "NFD": "E\u0301",
        "NFKC": "EÃÅ",
        "NFKD": "E\u0301"
    },
    "aÃÅ": {
        "NFC": "AÃÅ",
        "NFD": "A\u0301",
        "NFKC": "AÃÅ",
        "NFKD": "A\u0301"
    },
    "aÃÄ": {
        "NFC": "AÃÄ",
        "NFD": "A\u0300",
        "NFKC": "AÃÄ",
        "NFKD": "A\u0300"
    },
    "nÃÉ": {
        "NFC": "NÃÉ",
        "NFD": "N\u0303",
        "NFKC": "NÃÉ",
        "NFKD": "N\u0303"
    },
    "√∏": {
        "NFC": "√ò",
        "NFD": "O\u0308",
        "NFKC": "√ò",
        "NFKD": "O\u0308"
    }
}

# Now you can print or verify this matrix directly in your notebook for manual checks:
print(normalization_matrix)


{'√©': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'}, '√®': {'NFC': 'EÃÄ', 'NFD': 'EÃÄ', 'NFKC': 'EÃÄ', 'NFKD': 'EÃÄ'}, '√º': {'NFC': 'UÃà', 'NFD': 'UÃà', 'NFKC': 'UÃà', 'NFKD': 'UÃà'}, '√ß': {'NFC': 'CÃß', 'NFD': 'CÃß', 'NFKC': 'CÃß', 'NFKD': 'CÃß'}, '√∂': {'NFC': 'OÃà', 'NFD': 'OÃà', 'NFKC': 'OÃà', 'NFKD': 'OÃà'}, '≈ì': {'NFC': '≈í', 'NFD': 'OÃÇE', 'NFKC': '≈í', 'NFKD': 'OÃÇE'}, '√¶': {'NFC': '√Ü', 'NFD': 'AÃå', 'NFKC': '√Ü', 'NFKD': 'AÃå'}, 'eÃÅ': {'NFC': 'EÃÅ', 'NFD': 'EÃÅ', 'NFKC': 'EÃÅ', 'NFKD': 'EÃÅ'}, 'aÃÅ': {'NFC': 'AÃÅ', 'NFD': 'AÃÅ', 'NFKC': 'AÃÅ', 'NFKD': 'AÃÅ'}, 'aÃÄ': {'NFC': 'AÃÄ', 'NFD': 'AÃÄ', 'NFKC': 'AÃÄ', 'NFKD': 'AÃÄ'}, 'nÃÉ': {'NFC': 'NÃÉ', 'NFD': 'NÃÉ', 'NFKC': 'NÃÉ', 'NFKD': 'NÃÉ'}, '√∏': {'NFC': '√ò', 'NFD': 'OÃà', 'NFKC': '√ò', 'NFKD': 'OÃà'}}


In [13]:
{
    '√©': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
    '√®': {'NFC': 'EÃÄ', 'NFD': 'EÃÄ', 'NFKC': 'EÃÄ', 'NFKD': 'EÃÄ'},
    '√º': {'NFC': 'UÃà', 'NFD': 'UÃà', 'NFKC': 'UÃà', 'NFKD': 'UÃà'},
    '√ß': {'NFC': 'CÃß', 'NFD': 'CÃß', 'NFKC': 'CÃß', 'NFKD': 'CÃß'},
    '√∂': {'NFC': 'OÃà', 'NFD': 'OÃà', 'NFKC': 'OÃà', 'NFKD': 'OÃà'},
    '≈ì': {'NFC': '≈í', 'NFD': 'OÃÇE', 'NFKC': '≈í', 'NFKD': 'OÃÇE'},
    '√¶': {'NFC': '√Ü', 'NFD': 'AÃå', 'NFKC': '√Ü', 'NFKD': 'AÃå'},
    'eÃÅ': {'NFC': 'EÃÅ', 'NFD': 'EÃÅ', 'NFKC': 'EÃÅ', 'NFKD': 'EÃÅ'},
    'aÃÅ': {'NFC': 'AÃÅ', 'NFD': 'AÃÅ', 'NFKC': 'AÃÅ', 'NFKD': 'AÃÅ'},
    'aÃÄ': {'NFC': 'AÃÄ', 'NFD': 'AÃÄ', 'NFKC': 'AÃÄ', 'NFKD': 'AÃÄ'},
    'nÃÉ': {'NFC': 'NÃÉ', 'NFD': 'NÃÉ', 'NFKC': 'NÃÉ', 'NFKD': 'NÃÉ'},
    '√∏': {'NFC': '√ò', 'NFD': 'OÃà', 'NFKC': '√ò', 'NFKD': 'OÃà'}
}


{'√©': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
 '√®': {'NFC': 'EÃÄ', 'NFD': 'EÃÄ', 'NFKC': 'EÃÄ', 'NFKD': 'EÃÄ'},
 '√º': {'NFC': 'UÃà', 'NFD': 'UÃà', 'NFKC': 'UÃà', 'NFKD': 'UÃà'},
 '√ß': {'NFC': 'CÃß', 'NFD': 'CÃß', 'NFKC': 'CÃß', 'NFKD': 'CÃß'},
 '√∂': {'NFC': 'OÃà', 'NFD': 'OÃà', 'NFKC': 'OÃà', 'NFKD': 'OÃà'},
 '≈ì': {'NFC': '≈í', 'NFD': 'OÃÇE', 'NFKC': '≈í', 'NFKD': 'OÃÇE'},
 '√¶': {'NFC': '√Ü', 'NFD': 'AÃå', 'NFKC': '√Ü', 'NFKD': 'AÃå'},
 'eÃÅ': {'NFC': 'EÃÅ', 'NFD': 'EÃÅ', 'NFKC': 'EÃÅ', 'NFKD': 'EÃÅ'},
 'aÃÅ': {'NFC': 'AÃÅ', 'NFD': 'AÃÅ', 'NFKC': 'AÃÅ', 'NFKD': 'AÃÅ'},
 'aÃÄ': {'NFC': 'AÃÄ', 'NFD': 'AÃÄ', 'NFKC': 'AÃÄ', 'NFKD': 'AÃÄ'},
 'nÃÉ': {'NFC': 'NÃÉ', 'NFD': 'NÃÉ', 'NFKC': 'NÃÉ', 'NFKD': 'NÃÉ'},
 '√∏': {'NFC': '√ò', 'NFD': 'OÃà', 'NFKC': '√ò', 'NFKD': 'OÃà'}}

In [5]:
import unicodedata

# Define the test characters and expected normalized values
test_characters = {
    '√©': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
    '√®': {'NFC': '√à', 'NFD': 'EÃÄ', 'NFKC': '√à', 'NFKD': 'EÃÄ'},
    '√º': {'NFC': '√ú', 'NFD': 'UÃà', 'NFKC': '√ú', 'NFKD': 'UÃà'},
    '√ß': {'NFC': '√á', 'NFD': 'CÃß', 'NFKC': '√á', 'NFKD': 'CÃß'},
    '√∂': {'NFC': '√ñ', 'NFD': 'OÃà', 'NFKC': '√ñ', 'NFKD': 'OÃà'},
    '≈ì': {'NFC': '≈í', 'NFD': '≈í', 'NFKC': '≈í', 'NFKD': '≈í'},
    '√¶': {'NFC': '√Ü', 'NFD': '√Ü', 'NFKC': '√Ü', 'NFKD': '√Ü'},
    'eÃÅ': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
    'aÃÅ': {'NFC': '√Å', 'NFD': 'AÃÅ', 'NFKC': '√Å', 'NFKD': 'AÃÅ'},
    'aÃÄ': {'NFC': '√Ä', 'NFD': 'AÃÄ', 'NFKC': '√Ä', 'NFKD': 'AÃÄ'},
    'nÃÉ': {'NFC': '√ë', 'NFD': 'NÃÉ', 'NFKC': '√ë', 'NFKD': 'NÃÉ'},
    '√∏': {'NFC': '√ò', 'NFD': '√ò', 'NFKC': '√ò', 'NFKD': '√ò'}
}

# Perform the normalization and verify it
for char, expected_norms in test_characters.items():
    for norm_form, expected_value in expected_norms.items():
        normalized = unicodedata.normalize(norm_form, char)
        # Convert the normalized value to uppercase
        normalized_upper = normalized.upper()
        
        # Print the results for verification
        print(f"Input: {char}, Norm Method: {norm_form}, Expected: {expected_value}, Normalized: {normalized_upper}")
        assert normalized_upper == expected_value, f"Test failed for {char} with {norm_form}. Expected {expected_value}, but got {normalized_upper}."


Input: √©, Norm Method: NFC, Expected: √â, Normalized: √â
Input: √©, Norm Method: NFD, Expected: EÃÅ, Normalized: EÃÅ
Input: √©, Norm Method: NFKC, Expected: √â, Normalized: √â
Input: √©, Norm Method: NFKD, Expected: EÃÅ, Normalized: EÃÅ
Input: √®, Norm Method: NFC, Expected: √à, Normalized: √à
Input: √®, Norm Method: NFD, Expected: EÃÄ, Normalized: EÃÄ
Input: √®, Norm Method: NFKC, Expected: √à, Normalized: √à
Input: √®, Norm Method: NFKD, Expected: EÃÄ, Normalized: EÃÄ
Input: √º, Norm Method: NFC, Expected: √ú, Normalized: √ú
Input: √º, Norm Method: NFD, Expected: UÃà, Normalized: UÃà
Input: √º, Norm Method: NFKC, Expected: √ú, Normalized: √ú
Input: √º, Norm Method: NFKD, Expected: UÃà, Normalized: UÃà
Input: √ß, Norm Method: NFC, Expected: √á, Normalized: √á
Input: √ß, Norm Method: NFD, Expected: CÃß, Normalized: CÃß
Input: √ß, Norm Method: NFKC, Expected: √á, Normalized: √á
Input: √ß, Norm Method: NFKD, Expected: CÃß, Normalized: CÃß
Input: √∂, Norm Method: NFC, Expected: √ñ, Norma

In [7]:
import unicodedata

# Define the test characters and expected normalized values
test_characters = {
    '√©': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
    '√®': {'NFC': '√à', 'NFD': 'EÃÄ', 'NFKC': '√à', 'NFKD': 'EÃÄ'},
    '√º': {'NFC': '√ú', 'NFD': 'UÃà', 'NFKC': '√ú', 'NFKD': 'UÃà'},
    '√ß': {'NFC': '√á', 'NFD': 'CÃß', 'NFKC': '√á', 'NFKD': 'CÃß'},
    '√∂': {'NFC': '√ñ', 'NFD': 'OÃà', 'NFKC': '√ñ', 'NFKD': 'OÃà'},
    '≈ì': {'NFC': '≈í', 'NFD': '≈í', 'NFKC': '≈í', 'NFKD': '≈í'},
    '√¶': {'NFC': '√Ü', 'NFD': '√Ü', 'NFKC': '√Ü', 'NFKD': '√Ü'},
    'eÃÅ': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
    'aÃÅ': {'NFC': '√Å', 'NFD': 'AÃÅ', 'NFKC': '√Å', 'NFKD': 'AÃÅ'},
    'aÃÄ': {'NFC': '√Ä', 'NFD': 'AÃÄ', 'NFKC': '√Ä', 'NFKD': 'AÃÄ'},
    'nÃÉ': {'NFC': '√ë', 'NFD': 'NÃÉ', 'NFKC': '√ë', 'NFKD': 'NÃÉ'},
    '√∏': {'NFC': '√ò', 'NFD': '√ò', 'NFKC': '√ò', 'NFKD': '√ò'}
}

# Perform the normalization and verify it
for char, expected_norms in test_characters.items():
    for norm_form, expected_value in expected_norms.items():
        normalized = unicodedata.normalize(norm_form, char)
        # Convert the normalized value to uppercase
        normalized_upper = normalized.upper()

        # Print the results for verification
        #print(f"Input: {char}, Norm Method: {norm_form}, Expected: {expected_value}, Normalized: {normalized_upper}")
        assert normalized_upper == expected_value, f"Test failed for {char} with {norm_form}. Expected {expected_value}, but got {normalized_upper}."


In [8]:
# what works in pytest:
test_characters = {
    '√©': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
    '√®': {'NFC': '√à', 'NFD': 'EÃÄ', 'NFKC': '√à', 'NFKD': 'EÃÄ'},
    '√º': {'NFC': '√ú', 'NFD': 'UÃà', 'NFKC': '√ú', 'NFKD': 'UÃà'},
    '√ß': {'NFC': '√á', 'NFD': 'CÃß', 'NFKC': '√á', 'NFKD': 'CÃß'},
    '√∂': {'NFC': '√ñ', 'NFD': 'OÃà', 'NFKC': '√ñ', 'NFKD': 'OÃà'},
    '≈ì': {'NFC': '≈í', 'NFD': '≈í', 'NFKC': '≈í', 'NFKD': '≈í'},
    '√¶': {'NFC': '√Ü', 'NFD': '√Ü', 'NFKC': '√Ü', 'NFKD': '√Ü'},
    'eÃÅ': {'NFC': '√â', 'NFD': 'EÃÅ', 'NFKC': '√â', 'NFKD': 'EÃÅ'},
    'aÃÅ': {'NFC': '√Å', 'NFD': 'AÃÅ', 'NFKC': '√Å', 'NFKD': 'AÃÅ'},
    'aÃÄ': {'NFC': '√Ä', 'NFD': 'AÃÄ', 'NFKC': '√Ä', 'NFKD': 'AÃÄ'},
    'nÃÉ': {'NFC': '√ë', 'NFD': 'NÃÉ', 'NFKC': '√ë', 'NFKD': 'NÃÉ'},
    '√∏': {'NFC': '√ò', 'NFD': '√ò', 'NFKC': '√ò', 'NFKD': '√ò'}
}

# Perform the normalization and verify it
for char, expected_norms in test_characters.items():
    for norm_form, expected_value in expected_norms.items():
        normalized = unicodedata.normalize(norm_form, char)
        # Convert the normalized value to uppercase
        normalized_upper = normalized.upper()

        # Print the results for verification
        #print(f"Input: {char}, Norm Method: {norm_form}, Expected: {expected_value}, Normalized: {normalized_upper}")
        assert normalized_upper == expected_value, f"Test failed for {char} with {norm_form}. Expected {expected_value}, but got {normalized_upper}."
