In [1]:
import re

# ==============================================================================
# The Final, Grammatically Precise Tokenizer for Clean Syllables
# This is used in Step 2 of the main function below.


# ==============================================================================
# The Main "Syllable Splitter" Function
# ==============================================================================
def custom_syllable_splitter(text: str) -> list:
    """
    Performs a deep syllable split by breaking down all consonant stacks.
    This function uses a two-step process to achieve the required logic for
    cases like 'နက္ခတ္တ' and 'ဥက္ကဋ္ဌ'.
    """

    
    # --- Step 1: Pre-processing to split stacks using a loop ---
    # This loop is the only reliable way to handle chained stacks.
    stacked_consonant_pattern = r'([က-အ])(်?္)([က-အ])'
    processed_text = text
    while re.search(stacked_consonant_pattern, processed_text):
        processed_text = re.sub(stacked_consonant_pattern, r'\1်'  + r'\3', processed_text)
    processed_text = re.sub(r"(([A-Za-z0-9]+)|[က-အ|ဥ|ဦ](င်္|[က-အ|ဥ][ှ]*[့း]*[်]|္[က-အ]|[ါ-ှႏꩻ][ꩻ]*){0,}|.)",r"\1 ", processed_text)
    print()
   #Step 2: Tokenization of the processed parts ---
   # The string is now clean of stacks, so we can tokenize it reliably.
    final_list = processed_text.split(" ")
    
    # Filter out empty strings caused by trailing spaces
    final_list = [word for word in final_list if word.strip()]
        
    return final_list

In [2]:
# A list of all Burmese consonants (including the great 'အ')
# This is used to distinguish consonants from vowels/diacritics.
# --- Constants ---
BURMESE_CONSONANTS = "ကခဂဃငစဆဇဈညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠအ"
# Consonantal medials (part of the onset)
CONSONANTAL_MEDIALS = "ျြှွ" 

# --- The Rhyme Group Normalization Map ---
# This is the heart of the logic. It maps an entire spelled rime
# to its canonical phonetic group. This is robust and easy to extend.
# Key: Orthographic (spelled) rime
# Value: Normalized phonetic rime for comparison
RIME_NORMALIZATION_MAP = {
    # -at sound (အက်)
    "တ်": "က်", "ပ်": "က်",  
    # -it sound (အိက်)
    "ိတ်": "ိက်", "ိပ်": "ိက်", "ိစ်": "ိက်",
    # -ut sound (အုတ်)
    "ုတ်": "ုပ်",
    # -et sound (အက်) - Note: different vowel from -at, but uses same final
    "က်": "က်",
    
    # -an sound (အန်)
    "မ်": "န်",
    # -in sound (အိန်)
    "ိမ်": "ိန်",
    # -un sound (အုန်)
    "ုန်": "ိန်", # This can sometimes be phonetic, though less common
    
    # Special finals
    "ဉ်": "န်", # Sounds like -an
    "ည်": "ယ်", # Sounds like -ay

    "ယ့်":"ဲ့"
}


def _get_onset_length(word: str) -> int:
    """Finds the length of the initial consonant cluster (onset)."""
    if not word or word[0] not in BURMESE_CONSONANTS:
        return 0
    
    onset_len = 1
    # Greedily consume any following consonantal medials
    while onset_len < len(word) and word[onset_len] in CONSONANTAL_MEDIALS:
        onset_len += 1
    return onset_len

# --- The Main Rhyme Function ---
def get_rhyme_group(word: str) -> str:
    """
    Finds the phonetically normalized rhyme part of a Burmese word.
    
    This function correctly isolates the rime (vowel + final) and normalizes it
    according to Burmese poetic rules (ကာရန်ဝဂ်), ensuring both the
    vowel and final consonant sound group are respected.
    """
    # 1. Isolate the rime by stripping the onset.
    onset_len = _get_onset_length(word)
    rime = word[onset_len:]
    rime = rime.replace('ါ', 'ာ')


    if not rime:
        return ""
    
    # 2. Normalize the rime using the map.
    # If the rime is in our map, return its canonical value. Otherwise, return the rime itself.
    return RIME_NORMALIZATION_MAP.get(rime, rime)


In [3]:

def check_than_bauk(poem: str):
    """
    Analyzes a given poem to check if it follows the rules of Than-Bauk.
    """
    print("-" * 30)
    print("📜 Analyzing Poem:\n")
    print("-" * 30)
    
    # 1. Preprocessing: Split into lines and words
    # Filter out any empty lines that might result from extra newlines
    lines = [line.strip() for line in poem.strip().split('\n') if line.strip()]

    # --- Rule 1: Check for 3 lines ---
    if len(lines) != 3:
        print(f"❌ Rule Failed: A Than-Bauk must have exactly 3 lines. (Found: {len(lines)})")
        print("-" * 30)
        return

    print("✅ Rule Passed: Poem has 3 lines.")
    words_by_line = [custom_syllable_splitter(line) for line in lines]
    # --- Rule 2: Check word counts per line ---
    all_word_counts_valid = True
    
    # Line 1 word count
    if len(words_by_line[0]) == 4:
        print(f"✅ Rule Passed: Line 1 has 4 words.")
    else:
        print(f"❌ Rule Failed: Line 1 must have 4 words. (Found: {len(words_by_line[0])})")
        all_word_counts_valid = False
        
    # Line 2 word count
    if len(words_by_line[1]) == 3:
        print(f"✅ Rule Passed: Line 2 has 3 words.")
    else:
        print(f"❌ Rule Failed: Line 2 must have 3 words. (Found: {len(words_by_line[1])})")
        all_word_counts_valid = False
        
    # Line 3 word count
    if len(words_by_line[2]) in [5, 7]:
        print(f"✅ Rule Passed: Line 3 has {len(words_by_line[2])} words (which is 5 or 7).")
    else:   
        print(f"❌ Rule Failed: Line 3 must have 5 or 7 words. (Found: {len(words_by_line[2])})")
        all_word_counts_valid = False
        
    # If word counts are wrong, we can't reliably check rhymes, so we stop.
    if not all_word_counts_valid:
        print("\n⚠️ Cannot check rhyme scheme due to incorrect word counts.")
        print("-" * 30)
        return

    # --- Rule 3: Check Rhyme Scheme (4-3-2) ---
    try:
        word1_4 = words_by_line[0][3] # 4th word of 1st line
        word2_3 = words_by_line[1][2] # 3rd word of 2nd line
        word3_2 = words_by_line[2][1] # 2nd word of 3rd line
        
      

        rhyme1 = get_rhyme_group(word1_4)
        rhyme2 = get_rhyme_group(word2_3)
        rhyme3 = get_rhyme_group(word3_2)
        
        print(f"  - Rhyme part for '{word1_4}': '{rhyme1}'")
        print(f"  - Rhyme part for '{word2_3}': '{rhyme2}'")
        print(f"  - Rhyme part for '{word3_2}': '{rhyme3}'")

        if rhyme1 == rhyme2 == rhyme3:
            print("\n✅ Rule Passed: The 4-3-2 rhyme scheme is correct.")
        else:
            print("\n❌ Rule Failed: The words do not rhyme correctly.")

    except IndexError:
        # This should not happen if word count checks passed, but it's good practice
        print("\n❌ Error: Could not extract rhyming words. Check line structure.")

    print("-" * 30)

# --- Examples to Test ---

# Example 1: A perfect Than-Bauk (by Ledī Sayādaw)
# Rhyme words: ကောင်း, လောင်း, စောင်း
poem_correct = """
ခိုးသားထားပြ
ဟူတုံက
မုချကြောက်အပ်စွာ
"""


if __name__ == "__main__":
    check_than_bauk(poem_correct)
   
 

------------------------------
📜 Analyzing Poem:

------------------------------
✅ Rule Passed: Poem has 3 lines.



✅ Rule Passed: Line 1 has 4 words.
✅ Rule Passed: Line 2 has 3 words.
✅ Rule Passed: Line 3 has 5 words (which is 5 or 7).
  - Rhyme part for 'ပြ': ''
  - Rhyme part for 'က': ''
  - Rhyme part for 'ချ': ''

✅ Rule Passed: The 4-3-2 rhyme scheme is correct.
------------------------------


In [4]:
print("\n\nNow you can try your own!")
print("Paste your 3-line poem below and press Enter twice to finish.")

user_input_lines = []
while True:
    try:
        line = input()
        if not line:
            break
        user_input_lines.append(line)
    except EOFError:
        break
        
user_poem = "\n".join(user_input_lines)
if user_poem:
    check_than_bauk(user_poem)



Now you can try your own!
Paste your 3-line poem below and press Enter twice to finish.


"၁။ ခိုးသားထားပြ၊ ဟူတုံက၊ မုချကြောက်အပ်စွာ။

၂။ ကိုယ်တွင်းသူခိုး၊ မြင်သူခိုး၊ နှစ်မျိုးမှတ်ကြရာ။

၃။ ကိုယ်တွင်းထားပြ၊ ပြင်ထားပြ၊ နှစ်ဝရှိသည်သာ။

၄။ ကိုယ်တွင်းရန်သူ၊ ပြင်ရန်သူ၊ နှစ်မူခွဲရှုရာ။

၅။ ပြင်ပလူထက်၊ ကိုယ်တွင်းခက်၊ ဆက်ဆက်သိအပ်စွာ။

၆။ ကိုယ်တွင်းခိုးသား၊ လက်ခံထား၊ ပြင်ခိုးသားတွေ မွှေလိမ့်မည်။

၇။ ကိုယ်တွင်းထားပြ၊ လက်ခံကြ၊ ပြင်ကထားပြ-ချေလိမ့်မည်။

၈။ ကိုယ်တွင်းရန်သူ၊ လက်ခံမူ၊ ပြင်ရန်သူကြောင့်- သေလိမ့်မည်။ "
