In [344]:
import re

class Autorep:

    def __init__(self, word = "", mel = '', reduced_mel = "", assoc=None):
        """
        Initialize an Autorep object.

        Parameters:
        - word (str): The word with tone markers. Default is an empty string.
        - reduced_mel (str): The OCP-applied tone representation of the word. Default is an empty string.
        - assoc (list): A list of tuples (j,k) indicating the association between tone (indexed by j) and syllable (indexed by k) list.
        """
        self.word = word
        self.tone = ''
        self.mel = ''
        self.reduced_mel = reduced_mel
        self.assoc = assoc if assoc is not None else []

    
        h_tone = "áéíóú"
        l_tone = "àèìòù"
        f_tone = "âêîôû"
        r_tone = "ǎěǐôû"  
    
       

        if self.reduced_mel:
        #check if the provided ar obey OCP -  adjacent identical characters
        #e.g the input "LHHL" raises error
            for i in range(len(self.reduced_mel) - 1):
                if self.reduced_mel[i] == self.reduced_mel[i + 1]:
                    raise ValueError("Input does not obey OCP")

        if self.assoc:
        #check if the tone number aligns with what association indicates 
           tone_number = max([j for j,k in self.assoc if j is not None])
           if tone_number != len(self.reduced_mel):
                raise ValueError("Tone association does not align with tone number")
            
          
        if not reduced_mel:
            self.tone_labels = {'H': h_tone, 'L': l_tone, 'F': f_tone, 'R': r_tone}
            self.tone += ''.join(next((k for k, v in self.tone_labels.items() if seg in v), '') for seg in self.word)
            self.mel = self.tone 
        
            # if the tone sequence contains 'F', convert 'F' into 'HL'
            for i in range(len(self.tone)):
                contour_count = 0 
                self.assoc.append((i+1,i+1)) 
                if self.tone[i] == 'F':
                    k,v = self.assoc[i]
                    self.mel = self.mel.replace('F','HL')
                    self.assoc.insert(2*i+1,(i+contour_count,i+1))
                elif self.tone[i] == 'R':
                    k,v = self.assoc[i]
                    self.mel = self.mel.replace('R','LH')
                    self.assoc.insert(2*i+1,(i+contour_count,i+1))


            for i in range(1,len(self.mel)):
                j,k = self.assoc[i]
                p,q = self.assoc[i-1]
                if self.mel[i] == self.mel[i-1]:
                    for j in range(i, len(self.assoc)):
                        self.assoc[j] = (self.assoc[i-1][0], self.assoc[j][1])
                else:
                    j = p + 1
                    self.assoc[i] = (j,k)
            
            self.reduced_mel = re.sub(r"(.)\1+", r"\1", self.mel)
           
        print (self.word, self.tone, self.reduced_mel, self.assoc) 



    def _tone_pos(self,check_tone):
            check_tone = check_tone.upper()
            if check_tone not in set(self.mel):
                return False
            else:   
                return [1+ i for i, j in enumerate(self.mel) if j == check_tone]      
    

    def _spreading_count(self,check_tone):
        for i in self._tone_pos(check_tone):
            spreading = 0
            for j in self.assoc:
                if j[0] == i:
                    spreading += 1 
            print(check_tone,i,"spread length",spreading)
            

    def check_contain(self, ar):
        if not self.check_float() and not ar.check_float():
            if len(ar.mel) <= len(self.mel) and ar.mel in self.mel:
                j,k = re.search(ar.mel, self.mel).span() # take out the first piece where two ARs match 
                shared_piece = self.assoc[j:k]
                updated_piece = []
                for (p,q) in shared_piece:
                    p_2 = shared_piece[0][0] - 1 # p2 = 0
                    q_2 = shared_piece[0][1] - 1 # q2 = 2
                    updated_piece.append((p-p_2, q-q_2)) 
                if updated_piece == ar.assoc:
                    return True
                else:
                    return False 
            else:
                raise KeyError
        else:
            print("fix later")
    

    def add_tone(self):
        """
        Add an unassociated tone in the AR by adding an tone to the melody and an assocation (j,k)
        j is one-unit increase of the tone numbers
        k is 'None' indicating the syllable is not associated w any tone unit
        """
        new_reduced_mel = self.reduced_mel + self.reduced_mel[-2] # new reduced melody is the current one with a opposte tone appended 
        # e.g  self.reduced_mel is "LHL", new one will be LHLH
        new_assoc = self.assoc.copy() 
        new_assoc.append((len(new_reduced_mel),None))
        new_autorep = Autorep(mel = self.mel, reduced_mel = new_reduced_mel, assoc=new_assoc)
        return new_autorep

    def add_syl(self):
        """
        Add an unassociated syllable in the AR by adding an assocation (j,k)
        j is 'None' indicating the syllable is not associated w any tone unit
        k is the one-unit increase of current syllable number (the variable max_syllable)
        """
        max_syllable_index = max([k for j,k in self.assoc if k is not None])
        new_assoc = self.assoc.copy()  # Create a copy to avoid modifying the original assoc list
        new_max_syllable_index = max_syllable_index +1
        new_assoc.append((None, new_max_syllable_index))
        new_autorep = Autorep(mel = self.mel, reduced_mel = self.reduced_mel, assoc=new_assoc)
        return new_autorep
    

    def float_tone(self):
        return ([(_,k) for (_,k) in reversed(self.assoc) if k is None][::-1])
    

    def float_syl(self):
        return [(j,_) for (j,_) in reversed(self.assoc) if j is None][::-1]

    
    def check_float(self):
        return any([self.float_tone(),self.float_syl()])


    def float_tone_to_syl(self):
        """
        Associate the first floating tone to the last syllable
        e.g LH [(1,1), (2,None)] -> [(1,1), (2,1)]
        """
        if self.float_tone is False:
            raise ValueError
        else:
            last_valid_tuple = max([(j,k) for (j,k)in self.assoc if j is not None and k is not None])
            t,s = last_valid_tuple
            first_float_tone = min(((j,k) for (j,k)in self.assoc if k is None), default=(float('inf'), None))
            k,_ = first_float_tone
            first_float_tone_index = self.assoc.index(first_float_tone)
            new_assoc = self.assoc[:]
            new_assoc[first_float_tone_index] = (k,s)
        return new_assoc
    

    def float_syl_to_tone(self):
        if self.float_syl is False:
            raise ValueError
        else:
            last_valid_tuple = max([(j,k) for (j,k)in self.assoc if j is not None and k is not None])
            t,s = last_valid_tuple
            first_float_syl = min(((j,k) for (j,k)in self.assoc if j is None), default=(float('inf'), None))
            _,k = first_float_syl
            first_float_syl_index = self.assoc.index(first_float_syl)
            new_assoc = self.assoc[:]
            new_assoc[first_float_syl_index] = (t,k)
        return new_assoc


    def flt_syl_flt_tone(self):
        float_tone = min(((j, k) for j, k in self.assoc if k is None))
        float_syl = min(((j, k) for j, k in self.assoc if j is None))
        new_assocline = (float_tone[0], float_syl[1])
        position_to_keep = min(self.assoc.index(float_tone),self.assoc.index(float_syl))
        position_to_remove = max(self.assoc.index(float_tone),self.assoc.index(float_syl))
        new_assoc = self.assoc[:]
        new_assoc[position_to_keep] = new_assocline
        new_assoc.remove(new_assoc[position_to_remove])
        return new_assoc

    def add_assoc(self):
        if not self.check_float:
            raise KeyError         
        elif self.float_tone() and self.float_syl():
            new_ar = [Autorep(mel=self.mel, reduced_mel=self.reduced_mel, assoc=self.flt_syl_flt_tone()),
                      Autorep(mel=self.mel, reduced_mel=self.reduced_mel, assoc=self.float_syl_to_tone()),
                      Autorep(mel=self.mel, reduced_mel=self.reduced_mel, assoc=self.float_tone_to_syl())]
            return (new_ar)
        elif self.float_syl() and not self.float_tone():
            new_ar = Autorep(mel=self.mel, reduced_mel=self.reduced_mel, assoc=self.float_syl_to_tone())
            return new_ar
        else:
            new_ar = Autorep(mel=self.mel, reduced_mel=self.reduced_mel, assoc=self.float_tone_to_syl())
            return new_ar

  
                
    def show(self):
        print (self.word, self.tone, self.reduced_mel, self.assoc) 


    def __eq__(self, other):
        return self.mel == other.mel and self.assoc == other.assoc  
    
    def next_sup_factor(self):
        if not self.check_float():
            return self.add_tone(), self.add_syl()
        else:
            return self.add_tone(), self.add_syl(), self.add_assoc()




## Test Autorep class by using transcriptions
Take two example, _máà_ and _mâ_ 

In [351]:

a = Autorep("máâ") # output: HF HL [(1, 1), (1, 2), (2, 2)]
b = Autorep("mà") # mà L L [(1, 1)]


print('Check if máâ contains mà:', a.check_contain(b)) # Check if máâ contains mà: True


print('add one floating syllable to mà L [(1, 1)] and create a new AR:')
c = b.add_syl() #  output:   L [(1, 1), (None, 2)]
type(c)         # <class '__main__.Autorep'>


print('add one floating tone to máâ HL[(1, 1), (1, 2), (2, 2)]')
d = a.add_tone() #   HLH [(1, 1), (1, 2), (2, 2), (3, None)]
print('keep adding one floating syl ')
d = d.add_syl() #   HLH [(1, 1), (1, 2), (2, 2), (3, None), (None, 3)]

máâ HF HL [(1, 1), (1, 2), (2, 2)]
mà L L [(1, 1)]
Check if máâ contains mà: True
add one floating syllable to mà L [(1, 1)] and create a new AR:
  L [(1, 1), (None, 2)]
add one floating tone to máâ HL[(1, 1), (1, 2), (2, 2)]
  HLH [(1, 1), (1, 2), (2, 2), (3, None)]
keep adding one floating syl 
  HLH [(1, 1), (1, 2), (2, 2), (3, None), (None, 3)]


In [363]:

print("Orginal")
d.show()


print("\n All possible associations between tones and syllables")
new = d.add_assoc()

#([(1, 1), (2, 2), (3, 3)],                 the floating tone and syllable doubly connected
# [(1, 1), (2, 2), (3, None), (2, 3)],      the floating syllable connected to the last nonfloating tone, leaving the floating tone unconnected
# [(1, 1), (2, 2), (3, 2), (None, 3)])      the floating tone connected to the last nonfloating syllable, leaving the floating syllable unconnected


Orginal
  HLH [(1, 1), (1, 2), (2, 2), (3, None), (None, 3)]

 All possible associations between tones and syllables
  HLH [(1, 1), (1, 2), (2, 2), (3, 3)]
  HLH [(1, 1), (1, 2), (2, 2), (3, None), (2, 3)]
  HLH [(1, 1), (1, 2), (2, 2), (3, 2), (None, 3)]


In [369]:
a = Autorep("máâ")
#a.check_float()

print("\nthe next sup factor for máà (no floating units) \ntwo options:")
a.next_sup_factor()

máâ HF HL [(1, 1), (1, 2), (2, 2)]

the next sup factor for máà (no floating units) 
two options:
  HLH [(1, 1), (1, 2), (2, 2), (3, None)]
  HL [(1, 1), (1, 2), (2, 2), (None, 3)]


(<__main__.Autorep at 0x1312ebe90>, <__main__.Autorep at 0x1312e8e90>)

In [328]:
b = Autorep(reduced_mel='HL',assoc = [(1, 1), (1, 2), (2, 2), (None, 3)])
#a.check_float()

b.next_sup_factor()

  HL [(1, 1), (1, 2), (2, 2), (None, 3)]
  HLH [(1, 1), (1, 2), (2, 2), (None, 3), (3, None)]
  HL [(1, 1), (1, 2), (2, 2), (None, 3), (None, 4)]
  HL [(1, 1), (1, 2), (2, 2), (2, 3)]


(<__main__.Autorep at 0x1311c8500>,
 <__main__.Autorep at 0x1311c8ef0>,
 <__main__.Autorep at 0x1311c8bc0>)

In [339]:
c = Autorep(reduced_mel='HLHL',assoc = [(1, 1), (2, 2), (3, 3), (None, 4), (4, None)])

print('Multiple options for nxt sup fac fixme')
c.next_sup_factor()

  HLHL [(1, 1), (2, 2), (3, 3), (None, 4), (4, None)]
Multiple options for nxt sup fac
  HLHLH [(1, 1), (2, 2), (3, 3), (None, 4), (4, None), (5, None)]
  HLHL [(1, 1), (2, 2), (3, 3), (None, 4), (4, None), (None, 5)]
  HLHL [(1, 1), (2, 2), (3, 3), (4, 4)]
  HLHL [(1, 1), (2, 2), (3, 3), (3, 4), (4, None)]
  HLHL [(1, 1), (2, 2), (3, 3), (None, 4), (4, 3)]


(<__main__.Autorep at 0x1314195e0>,
 <__main__.Autorep at 0x131419280>,
 [<__main__.Autorep at 0x131419a00>,
  <__main__.Autorep at 0x131419610>,
  <__main__.Autorep at 0x13141b8f0>])