In [21]:
import re

class RegexAutomaton:
    def __init__(self, pattern):
        self.pattern = re.compile(pattern)

    def test(self, word):
        return "Accepted" if self.pattern.fullmatch(word) else "Not Accepted"

# Define regex for the DFA
# Must start with a letter (A-Z or a-z), followed by 0 or more lowercase letters
regex_dfa = RegexAutomaton(r"^[a-z][a-z]*$")

# Test examples
words = ["Cat", "dog", "A", "zebra", "dog1", "1dog", "DogHouse", "Dog_house", " cats"]

for word in words:
    print(f"{word!r}: {regex_dfa.test(word.strip())}")


'Cat': Not Accepted
'dog': Accepted
'A': Not Accepted
'zebra': Accepted
'dog1': Not Accepted
'1dog': Not Accepted
'DogHouse': Not Accepted
'Dog_house': Not Accepted
' cats': Accepted


In [5]:
# Your function
def simulate_dfa(word):
    state = 'q0'
    for char in word:
        if state == 'q0':
            if 'a' <= char <= 'z':
                state = 'q1'
            else:
                state = 'q_dead'
        elif state == 'q1':
            if 'a' <= char <= 'z':
                state = 'q1'
            else:
                state = 'q_dead'
        elif state == 'q_dead':
            return "Not Accepted"
    return "Accepted" if state == 'q1' else "Not Accepted"

# Test examples
words = ["Cat", "dog", "A", "zebra", "dog1", "1dog", "DogHouse", "Dog_house", " cats"]

for w in words:
    print(f"{w!r}: {simulate_dfa(w.strip())}")


'Cat': Not Accepted
'dog': Accepted
'A': Not Accepted
'zebra': Accepted
'dog1': Not Accepted
'1dog': Not Accepted
'DogHouse': Not Accepted
'Dog_house': Not Accepted
' cats': Accepted


In [22]:
pip install automathon


Collecting automathon
  Downloading automathon-0.0.15-py3-none-any.whl.metadata (6.8 kB)
Collecting graphviz==0.16 (from automathon)
  Downloading graphviz-0.16-py2.py3-none-any.whl.metadata (7.1 kB)
Downloading automathon-0.0.15-py3-none-any.whl (13 kB)
Downloading graphviz-0.16-py2.py3-none-any.whl (19 kB)
Installing collected packages: graphviz, automathon
  Attempting uninstall: graphviz
    Found existing installation: graphviz 0.21
    Uninstalling graphviz-0.21:
      Successfully uninstalled graphviz-0.21
Successfully installed automathon-0.0.15 graphviz-0.16


In [2]:
import re

class PluralFST:
    def __init__(self, lexicon):
        self.lexicon = set(word.lower() for word in lexicon)

    def analyze(self, word):
        w = word.lower()

        # Singular check
        if w in self.lexicon:
            return f"{w}+N+SG"

        # Rule 1: E insertion (watches, foxes, etc.)
        if re.match(r"^(.+?)(s|z|x|ch|sh)es$", w):
            base = re.sub(r"es$", "", w)
            if base in self.lexicon:
                return f"{base}+N+PL"

        # Rule 2: Y replacement (tries -> try)
        if re.match(r"^.+ies$", w):
            base = re.sub(r"ies$", "y", w)
            if base in self.lexicon:
                return f"{base}+N+PL"

        # Rule 3: S addition (bags -> bag)
        if re.match(r"^.+s$", w):
            base = re.sub(r"s$", "", w)
            if base in self.lexicon:
                return f"{base}+N+PL"

        return "Invalid Word"


# Example lexicon (replace with brown_nouns.txt contents in real use)
lexicon = {"fox", "watch", "try", "bag", "cat", "dog", "zebra"}

fst = PluralFST(lexicon)

# Test words
words = ["foxes", "fox", "watch", "watches", "tries", "try",
         "bags", "bag", "foxs", "cats"]

for w in words:
    print(f"{w!r}: {fst.analyze(w)}")


'foxes': fox+N+PL
'fox': fox+N+SG
'watch': watch+N+SG
'watches': watch+N+PL
'tries': try+N+PL
'try': try+N+SG
'bags': bag+N+PL
'bag': bag+N+SG
'foxs': fox+N+PL
'cats': cat+N+PL


In [10]:
def fst_plural_analysis(word):
    word = word.lower()
    rev = word[::-1]  # reverse input for right-to-left processing

    state = 'q0'
    root_letters = []  # to collect root letters while reading reversed input

    # Helper functions for checking endings
    vowels = set('aeiou')

    i = 0
    n = len(rev)

    while i < n:
        c = rev[i]

        if state == 'q0':
            if c == 's':
                state = 'q_s'  # possible plural suffix start
                i += 1
            elif 'a' <= c <= 'z':
                state = 'q_copy_SG'  # singular word, start copying root
                root_letters.append(c)
                i += 1
            else:
                return "Invalid Word"

        elif state == 'q_s':
            if i >= n:
                return "Invalid Word"  # no next letter after s

            c = rev[i]

            if c == 'e':
                state = 'q_se'  # possible 'es' suffix
                i += 1
            elif c == 'i':
                state = 'q_sei'  # possible 'ies' suffix
                i += 1
            elif c in ('x', 'z', 's', 'h'):
                # endings that need 'es' plural, but only got 's'
                # e.g. 'foxs' invalid
                return "Invalid Word"
            elif 'a' <= c <= 'z':
                # regular plural ending with just s after normal root letter
                state = 'q_copy_PL'
                root_letters.append(c)
                i += 1
            else:
                return "Invalid Word"

        elif state == 'q_se':
            if i >= n:
                return "Invalid Word"  # incomplete suffix after se

            c = rev[i]

            # Check if this letter confirms valid es plural ending
            if c in ('s', 'x', 'z', 'o'):
                # valid es plural root ending letters
                state = 'q_copy_PL'
                root_letters.append(c)
                i += 1
            elif c == 'h':
                # need to look further back to check ch or sh
                if i + 1 < n:
                    next_c = rev[i+1]
                    if next_c == 'c' or next_c == 's':
                        # valid ch or sh ending
                        state = 'q_copy_PL'
                        root_letters.append(c)
                        root_letters.append(next_c)
                        i += 2
                    else:
                        return "Invalid Word"
                else:
                    return "Invalid Word"
            else:
                return "Invalid Word"

        elif state == 'q_sei':
            if i >= n:
                return "Invalid Word"  # incomplete ies suffix

            c = rev[i]
            if c not in vowels:
                # consonant before ies, valid y-replacement plural
                state = 'q_copy_PL_y'  # special copy with final 'y'
                root_letters.append(c)
                i += 1
            else:
                # vowel before ies is invalid in this simplified model
                return "Invalid Word"

        elif state == 'q_copy_PL':
            # copy rest letters as root for plural
            if i < n:
                c = rev[i]
                if 'a' <= c <= 'z':
                    root_letters.append(c)
                    i += 1
                else:
                    return "Invalid Word"
            else:
                # EOF reached
                root = ''.join(root_letters[::-1])
                return f"{root}+N+PL"

        elif state == 'q_copy_PL_y':
            # like q_copy_PL but final root ends with 'y' instead of 'ie'
            if i < n:
                c = rev[i]
                if 'a' <= c <= 'z':
                    root_letters.append(c)
                    i += 1
                else:
                    return "Invalid Word"
            else:
                # EOF reached
                root = ''.join(root_letters[::-1]) + 'y'
                return f"{root}+N+PL"

        elif state == 'q_copy_SG':
            # copy entire word as root for singular
            if i < n:
                c = rev[i]
                if 'a' <= c <= 'z':
                    root_letters.append(c)
                    i += 1
                else:
                    return "Invalid Word"
            else:
                root = ''.join(root_letters[::-1])
                return f"{root}+N+SG"

        else:
            return "Invalid Word"

    # If loop ends unexpectedly
    if state == 'q_copy_SG':
        root = ''.join(root_letters[::-1])
        return f"{root}+N+SG"
    else:
        return "Invalid Word"


# Example usage on brown_nouns.txt
def process_file(input_file, output_file):
    with open(input_file, "r") as f_in, open(output_file, "w") as f_out:
        for line in f_in:
            word = line.strip().lower()
            if not word:
                continue
            output = fst_plural_analysis(word)
            f_out.write(f"{word} = {output}\n")


# Run it
process_file("brown_nouns.txt", "output2.txt")
print("Processing complete.")

Processing complete.


In [11]:
pip install automata-lib pandas graphviz colormath


Collecting automata-lib
  Downloading automata_lib-9.1.2-py3-none-any.whl.metadata (4.9 kB)
Collecting colormath
  Downloading colormath-3.0.0.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting cached_method>=0.1.0 (from automata-lib)
  Downloading cached_method-0.1.0-py3-none-any.whl.metadata (2.9 kB)
Downloading automata_lib-9.1.2-py3-none-any.whl (80 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m80.8/80.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cached_method-0.1.0-py3-none-any.whl (4.2 kB)
Building wheels for collected packages: colormath
  Building wheel for colormath (setup.py) ... [?25l[?25hdone
  Created wheel for colormath: filename=colormath-3.0.0-py3-none-any.whl size=39405 sha256=6492251cd31f4f9de2f928e168688b297049e3de276e7e06a45bb54b878a1ddc
  Stored in directory: /root/.cache/pip/wheels/f5/ce/f7/7039d7b57e1a27fc2f6bb2b2abed1be362eefece03caf2fb9a
Successfully built colormath
Installing collected pack

In [13]:
from graphviz import Digraph

def visualize_fst():
    dot = Digraph(comment='Plural Morphology FST')

    # States
    states = ['q0', 'q_s', 'q_se', 'q_sei', 'q_copy_PL', 'q_copy_PL_y', 'q_copy_SG', 'q_dead']
    final_states = ['q_copy_PL', 'q_copy_PL_y', 'q_copy_SG']

    # Add states to graph
    for s in states:
        if s in final_states:
            dot.node(s, s, shape='doublecircle')
        elif s == 'q_dead':
            dot.node(s, s, shape='box', style='filled', fillcolor='lightgray')
        else:
            dot.node(s, s)

    # Transitions from q0
    dot.edge('q0', 'q_s', label='s / ε')
    dot.edge('q0', 'q_copy_SG', label='a–z except s / copy letter')

    # Transitions from q_s
    dot.edge('q_s', 'q_se', label='e / ε')
    dot.edge('q_s', 'q_sei', label='i / ε')
    dot.edge('q_s', 'q_dead', label='x,z,s,h / ε')
    dot.edge('q_s', 'q_copy_PL', label='a–z except e,i,x,z,s,h / copy letter')

    # Transitions from q_se
    dot.edge('q_se', 'q_copy_PL', label='s,x,z,o / copy letter')
    dot.edge('q_se', 'q_copy_PL', label='h + lookahead c or s / copy ch or sh')
    dot.edge('q_se', 'q_dead', label='others / ε')

    # Transitions from q_sei
    dot.edge('q_sei', 'q_copy_PL_y', label='consonant / copy letter')
    dot.edge('q_sei', 'q_dead', label='vowel / ε')

    # Copy plural states transitions (loop to self)
    dot.edge('q_copy_PL', 'q_copy_PL', label='a–z / copy letter')
    dot.edge('q_copy_PL_y', 'q_copy_PL_y', label='a–z / copy letter')

    # Copy singular state transitions (loop to self)
    dot.edge('q_copy_SG', 'q_copy_SG', label='a–z / copy letter')

    # Dead state loop
    dot.edge('q_dead', 'q_dead', label='any / ε')

    # You can add labels for final outputs on final states
    dot.node('q_copy_PL', 'q_copy_PL\n(final +N+PL)', shape='doublecircle', color='black')
    dot.node('q_copy_PL_y', 'q_copy_PL_y\n(final +N+PL)', shape='doublecircle', color='black')
    dot.node('q_copy_SG', 'q_copy_SG\n(final +N+SG)', shape='doublecircle', color='black')

    # Render to a file (PDF/PNG/SVG)
    dot.render('fst_plural_morphology', format='png', cleanup=True)
    print("FST diagram saved as fst_plural_morphology.png")

visualize_fst()

FST diagram saved as fst_plural_morphology.png
