In [54]:
# -*- coding: utf-8 -*-
import os
import re
import pandas as pd
import graphviz


## set the phonology
h_tone = set("áéíóú")
l_tone = set("àèìòù")   
f_tone = set("âêîôû")   
r_tone = set("ǎěǐǒǔ")  
untoned = set("aeiou")  # for long vowels (â indicates a short vowel with fall tone; âa a long F)
vowels = h_tone | l_tone | r_tone | f_tone | untoned
moraic_coda = 1 # 1 if coda carries mora else 0


simple_tones =  h_tone | l_tone   # Combine high and low tones into one set
special_tones =  r_tone | f_tone  # Set for special tones (F, R)
         
class Autorep:

    global check_coda
    def check_coda(syl):
        # check if a syllable contains a coda
        for i in range(1, len(syl)):
            if syl[i] not in vowels and syl[i-1] in vowels:
                return moraic_coda 
        return 0
    
    global vowel_count  # check number of vowels a syllable contains 
    def vowel_count(syl):
        vowel_count = 0
        for i in range(0, len(syl)):  
            if syl[i] in vowels or syl[i] in simple_tones:
                vowel_count += 1
            elif syl[i] in special_tones and (i + 1 < len(syl)) and syl[i + 1] not in vowels:
                vowel_count += 2
        return vowel_count
        
    global mora_count # count the number of mora in a string 
    def mora_count(str): 
        mora_count = 0
        mora_list =[]
        syllables = str.split(".")
        for i, syl in enumerate(syllables):
            syl_weight = check_coda(syl) + vowel_count(syl)
            mora_list.append(syl_weight)
            #for j in range(syl_weight):
             #   self.assoc.append((self.tone[i],j+mora_count+1,i+1))
            mora_count += syl_weight
        return mora_count, mora_list
    

    global contour_count # count the number of contour tones in a string 
    def contour_count(s):
        count = 0  
        for i in s:
            if i in special_tones:
                count += 1
        return count

    
    def __init__(self, word="", tone="", ocp_mel="", assoc=None):
        """
        Initialize an Autorep object.

        Parameters:
        - word (str): The word with tone markers.                           e.g. mútûm
        - tone (str): The tone markers directly extracted from the word     e.g. HF       
        - mel (str): The melody (F -> HL and R -> LH) before OCP applies.   e.g. HHL
        - ocp_mel (str): The OCP-applied tone representation of the word.   e.g. HL
        - assoc (list): A list of tuples (j,k) indicating the association   e.g  [(1, 1, 1), (1, 2, 2), (2, 3, 2)] two-toned trimoraic bisyllabic form
                        between tone (indexed by j)                          
                                mora (indexed by i) 
                                syllable (indexed by k) list.
        """

        self.word = word
        self.tone = ''
        self.ocp_mel = ocp_mel
        self.assoc = assoc if assoc is not None else []

        self.tone_labels = {'H': h_tone, 'L': l_tone, 'F': f_tone, 'R': r_tone}
        self.tone += ''.join(next((k for k, v in self.tone_labels.items() if seg in v), '') for seg in self.word)
    
        self.syllables = self.word.split(".")
        
        #assign association 
        if self.assoc is None:
            mora_idx = 0
            for i, syl in enumerate(self.syllables):
                syl_weight = check_coda(syl) + vowel_count(syl)
                for j in range(syl_weight):
                    self.assoc.append((self.tone[i],j+mora_idx+1,i+1))
                mora_idx += syl_weight
       

        # Flatten F into HL
        for i, (t, m, s) in enumerate(self.assoc):    
            if self.tone[s-1] == 'F':
                # Join syllables up to s-1 to pass as a string to mora_count
                syllable_str = ''.join(self.syllables[:s-1])
                if m - mora_count(syllable_str)[0] == 1:
                    t = 'H'
                else:
                    t = "L"
            elif self.tone[s-1] == 'R':
                # Join syllables up to s-1 to pass as a string to mora_count
                syllable_str = ''.join(self.syllables[:s-1])
                if m - mora_count(self.syllables[:s-1])[0] == 1:
                    t = 'L'
                else:
                    t = "H"
            
            # Update the association
            self.assoc[i] = (t, m, s)

        self.mel = ''.join(tone for tone, _, _ in self.assoc)
        self.ocp_mel = re.sub(r"(.)\1+", r"\1", self.mel)
        
        j = 0
        i = 0
        while j < len(self.assoc) and i < len(self.ocp_mel): 
            if self.assoc[j][0] == self.ocp_mel[i]:  
                t,m,s = self.assoc[j]
                t = i + 1  # Assign the new value (i + 1) to t
                self.assoc[j] = (t, m, s)  # Update self.assoc[j] with the new tone
                j += 1
            else:
                i += 1
        
    def check_empty(self):
        return (self.word == "" and self.assoc == [] and self.mel == "" and self.ocp_mel == "")     

    def syl_num(self):
        return max([s for t,m,s in self.assoc if s is not None], default=0)
    
    def mora_num(self):
        return max([m for t,m,s in self.assoc if m is not None], default=0)    
    
    @staticmethod
    def index_reset(lst):
        """
        Reset indices of the association list to start from 1.
        """
        t,m,s = lst[0] if lst else (None, None, None)
        distance_t = t - 1 if t is not None else None
        distance_m = m - 1 if m is not None else None
        distance_s = s - 1 if s is not None else None
        new_list = []
        for (t,m,s) in lst:
            t = t - distance_t if t is not None and distance_t is not None else None
            m = m - distance_m if m is not None and distance_m is not None else None
            s = s - distance_s if m is not None and distance_s is not None else None
            new_list.append((t,m,s))
        return new_list 

    def check_contain(self, ar):
        """
        Check if the Autorep object contains another Autorep object.
        """ 
        
        #self.assoc = self.sort_assoc(self.assoc) #sort associations, put floating right side
        #ar.assoc = ar.sort_assoc(ar.assoc)
        
        if self.check_empty() or ar.check_empty(): # empty ar contains nothing and everything contains empty
            return False if not ar.check_empty() else True 
        
        elif ar == self: #reflexivity 
            return True
        
        elif ar.ocp_mel in self.ocp_mel:
            if ar.mora_num() <= self.mora_num():
                check_position = [m.start() + 1 for m in re.finditer(r'(?={})'.format(ar.ocp_mel), self.ocp_mel)]
                for i_match in check_position:
                    #print(f"shared {i_match} tone start from the position of {check_position}")
                    window_size = len(ar.assoc)
                    scan_time = len([tup for tup in self.assoc if tup[0] == i_match]) #if a L has multiple (2,1) (2,2)
                    restriction = [tup for tup in self.assoc if tup[0] == i_match]
                    #print(f"restriction {restriction}")
                    #print(f"scan time {scan_time}")
                    for i,tup in enumerate(restriction):
                        get_index = self.assoc.index(tup)
                        #print(f"{i} the check..")
                        #print(f"check this {[tup for tup in ar.assoc if not (tup[0] is None or tup[1] is None)]}")
                        #print(f"is contained with {self.assoc[get_index:get_index+window_size]} which resets to {self.index_reset(self.assoc[get_index:get_index+window_size])}")
                        #print(f"do they match? {self.index_reset(self.assoc[get_index:get_index+window_size]) == [tup for tup in ar.assoc if not (tup[0] is None or tup[1] is None)]}")
                        if self.index_reset(self.assoc[get_index:get_index+window_size]) == [tup for tup in ar.assoc if not (tup[0] is None or tup[1] is None or tup[2]is None)]:
                            return True
                    return False
                else:
                    return False
            else:
                return False
        else:
            return False

In [56]:
a = Autorep("dú.hùu")  
b = Autorep("fàa")  
d = Autorep("mú.tûm")
c = Autorep("gáa.jí.màa.rée")  
print(a.assoc)


[]


In [47]:
c.check_contain(a)

True

In [57]:
# Define the list of Autorep objects with different ocp_mel values
autorep_list = [
    Autorep(ocp_mel="HL", assoc=[(1, 1, 1), (2, 2, 1), (2,3, 2)]),
    Autorep(ocp_mel="HL", assoc=[(1, 1, 1), (2, 2, 1), (2,3, None)])]

for i, ar1 in enumerate(autorep_list):
    for j, ar2 in enumerate(autorep_list):
        print(f"{i} contain {j}: {ar1.check_contain(ar2)}")


IndexError: string index out of range