In [None]:
pip install visual-automata

In [1]:
import re

def valid_eng_word(word):
    pattern = r'\b[a-z]+\b'  # start and end with lowercase letters only
    accept = re.fullmatch(pattern, word)
    if accept:
        print(accept.string,'Accepted')
    else:
        print('Not accepted')


In [3]:
valid_eng_word('dog'),
valid_eng_word('cat'),
valid_eng_word('a  '),
valid_eng_word('rani0')
valid_eng_word(' cat'),
valid_eng_word('_at')



dog Accepted
cat Accepted
Not accepted
Not accepted
Not accepted
Not accepted


In [4]:
from automata.fa.dfa import DFA

from visual_automata.fa.dfa import VisualDFA

In [36]:
import os

# Optional: load the nouns from brown_nouns.txt if available
lexicon = set()    #unordered
if os.path.exists("brown_nouns.txt"):
    with open("brown_nouns.txt", "r") as f:
        lexicon = {line.strip().lower() for line in f if line.strip()}
        

print(len(lexicon))
# Irregular plural forms
irregular_plurals = {
    "men": "man",
    "women": "woman",
    "children": "child",
    "teeth": "tooth",
    "feet": "foot",
    "mice": "mouse",
    "geese": "goose",
    "knives": "knife",
    "wives": "wife",
    "lives": "life",
    "leaves": "leaf",
}

def analyze_noun(word):
    w = word.lower()

    # Irregular plural check
    if w in irregular_plurals:
        return f"{irregular_plurals[w]}+N+PL"
    if w in irregular_plurals.values():
        return f"{w}+N+SG"

    # Rule: Plural with 'es' after certain endings
    if w.endswith("es"):
        if any(w.endswith(e) for e in ["ses", "zes", "xes", "ches", "shes"]):
            root = w[:-2]  # remove 'es'
            if lexicon and root not in lexicon:
                return "Invalid Word"
            return f"{root}+N+PL"

    # Rule: Plural with 'ies' (y → ies)
    if w.endswith("ies"):
        root = w[:-3] + "y"
        if lexicon and root not in lexicon:
            return "Invalid Word"
        return f"{root}+N+PL"

    # Rule: Singular with 'y'
    if w.endswith("y"):
        if lexicon and w not in lexicon:
            return "Invalid Word"
        return f"{w}+N+SG"

    # Rule: Regular plural with 's'
    if w.endswith("s"):
        root = w[:-1]
        # Reject if root ends with x, s, z, ch, sh (needs 'es')
        if any(root.endswith(e) for e in ["x", "s", "z", "ch", "sh"]):
            return "Invalid Word"
        
        return f"{root}+N+PL"

    # Singular default
    if lexicon and w not in lexicon:
        return "Invalid Word"
    return f"{w}+N+SG"

for i,word in enumerate(lexicon):
    print(f"{i+1}.{word} = {analyze_noun(word)}")

#print(f"{'ballards'} = {analyze_noun('ballards')}")


17892
1.landing = landing+N+SG
2.optics = optic+N+PL
3.scrim = scrim+N+SG
4.associates = associate+N+PL
5.similarities = similarity+N+PL
6.video = video+N+SG
7.bluebush = bluebush+N+SG
8.stretcher = stretcher+N+SG
9.alteration = alteration+N+SG
10.swallows = swallow+N+PL
11.grubs = grub+N+PL
12.downs = down+N+PL
13.ucla = ucla+N+SG
14.groundwork = groundwork+N+SG
15.bobby = bobby+N+SG
16.shit = shit+N+SG
17.horsepower = horsepower+N+SG
18.ducks = duck+N+PL
19.stampede = stampede+N+SG
20.bromphenol = bromphenol+N+SG
21.fruit = fruit+N+SG
22.remark = remark+N+SG
23.delusion = delusion+N+SG
24.esters = ester+N+PL
25.electrolysis = electrolysi+N+PL
26.slowness = Invalid Word
27.shrubbery = shrubbery+N+SG
28.evangelists = evangelist+N+PL
29.threads = thread+N+PL
30.favors = favor+N+PL
31.doorkeeper = doorkeeper+N+SG
32.roar = roar+N+SG
33.bus = bu+N+PL
34.check = check+N+SG
35.acquisitions = acquisition+N+PL
36.staff = staff+N+SG
37.bun = bun+N+SG
38.head = head+N+SG
39.attentions = attenti

In [32]:
# Define input and output alphabets
input_alphabet = set("abcdefghijklmnopqrstuvwxyz")
output_alphabet = input_alphabet.union({"+N+SG", "+N+PL"})

# Define states
states = {
    "START",       # start reading the word
    "STEM",        # reading root letters
    "END_S",       # possible plural S
    "END_ES",      # plural ES
    "END_IES",     # plural IES
    "FINAL_SG",    # singular accept state
    "FINAL_PL"     # plural accept state
}
start_state = "START"
final_states = {"FINAL_SG", "FINAL_PL"}

# Define transitions: (state, input) -> (next_state, output)
transitions = {}

# Generic letter reading (build stem)
for ch in input_alphabet:
    transitions[("START", ch)] = ("STEM", ch)
    transitions[("STEM", ch)] = ("STEM", ch)

# Rule 1: E insertion after -s, -z, -x, -ch, -sh before adding "es"
# We'll detect these endings in the FST
special_es_endings = ["s", "z", "x"]
special_es_pairs = ["ch", "sh"]

# Rule 2: Y replacement (consonant + y → ies)
# Will handle in code logic (needs context: prev char consonant)

# Rule 3: S addition (default plural rule)

def run_fst(word):
    # First, handle plural vs singular classification manually
    if word in input_alphabet:
        return "Invalid Word"  # too short

    # Singular case: if it's in our noun list and no plural ending
    if not (word.endswith("s") or word.endswith("es") or word.endswith("ies")):
        return f"{word}+N+SG"

    # Plural cases:
    # Case 1: Ends with 'ies' → Y replacement
    if word.endswith("ies") and len(word) > 3:
        stem = word[:-3] + "y"
        # Ensure before 'y' is a consonant
        if stem[-2] not in "aeiou":
            return f"{stem}+N+PL"
        else:
            return "Invalid Word"

    # Case 2: Ends with 'es' and matches E-insertion rule
    if word.endswith("es"):
        stem = word[:-2]
        if any(stem.endswith(e) for e in special_es_endings) or any(stem.endswith(e) for e in special_es_pairs):
            return f"{stem}+N+PL"
        

    # Case 3: Ends with 's' → default plural
    if word.endswith("s"):
        stem = word[:-1]
        # reject if stem should have had 'es'
        if any(stem.endswith(e) for e in special_es_endings) or any(stem.endswith(e) for e in special_es_pairs):
            return "Invalid Word"
        # reject if stem should have had 'ies'
        if stem.endswith("y") and stem[-2] not in "aeiou":
            return "Invalid Word"
        return f"{stem}+N+PL"

    return "Invalid Word"



lexicon = set()    #unordered
if os.path.exists("brown_nouns.txt"):
    with open("brown_nouns.txt", "r") as f:
        lexicon = {line.strip().lower() for line in f if line.strip()}
 
print('Total words: ',len(lexicon),'\n')
for i,w in enumerate(lexicon):
    print(f"{i+1}. {w} -> {run_fst(w)}")

#print(f"associates-> {run_fst('associates')}")


Total words:  17892 

1. landing -> landing+N+SG
2. optics -> optic+N+PL
3. scrim -> scrim+N+SG
4. associates -> associate+N+PL
5. similarities -> similarity+N+PL
6. video -> video+N+SG
7. bluebush -> bluebush+N+SG
8. stretcher -> stretcher+N+SG
9. alteration -> alteration+N+SG
10. swallows -> swallow+N+PL
11. grubs -> grub+N+PL
12. downs -> down+N+PL
13. ucla -> ucla+N+SG
14. groundwork -> groundwork+N+SG
15. bobby -> bobby+N+SG
16. shit -> shit+N+SG
17. horsepower -> horsepower+N+SG
18. ducks -> duck+N+PL
19. stampede -> stampede+N+SG
20. bromphenol -> bromphenol+N+SG
21. fruit -> fruit+N+SG
22. remark -> remark+N+SG
23. delusion -> delusion+N+SG
24. esters -> ester+N+PL
25. electrolysis -> electrolysi+N+PL
26. slowness -> Invalid Word
27. shrubbery -> shrubbery+N+SG
28. evangelists -> evangelist+N+PL
29. threads -> thread+N+PL
30. favors -> favor+N+PL
31. doorkeeper -> doorkeeper+N+SG
32. roar -> roar+N+SG
33. bus -> bu+N+PL
34. check -> check+N+SG
35. acquisitions -> acquisition+N+