In [1]:
import re

def valid_eng_word(word):
    pattern = r'\b[a-z]+\b'  # start and end with lowercase letters only
    accept = re.fullmatch(pattern, word)
    if accept:
        print(accept.string,'Accepted')
    else:
        print('Not accepted')


In [2]:
valid_eng_word('dog'),
valid_eng_word('cat'),
valid_eng_word('a  '),
valid_eng_word('rani0')
valid_eng_word(' cat'),
valid_eng_word('_at')



dog Accepted
cat Accepted
Not accepted
Not accepted
Not accepted
Not accepted


In [5]:
conda install -c conda-forge graphviz


Retrieving notices: done
Channels:
 - conda-forge
 - defaults
 - anaconda
Platform: win-64
Collecting package metadata (repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: C:\Users\rani\anaconda3\envs\machinelearning

  added / updated specs:
    - graphviz


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2025.8.3   |       h4c7d964_0         151 KB  conda-forge
    cairo-1.18.4               |       he9e932c_0         637 KB
    certifi-2025.8.3           |     pyhd8ed1ab_0         155 KB  conda-forge
    font-ttf-dejavu-sans-mono-2.37|       hab24e00_0         388 KB  conda-forge
    font-ttf-inconsolata-3.000 |       h77eed37_0          94 KB  conda-forge
    font-ttf-source-code-pro-2.038|       h77eed37_0         684 KB  conda-forge
    font-ttf-ubuntu-0.83       |       h77eed37_3         1.5 MB  c

In [1]:
# Install automathon and graphviz
!pip install automathon graphviz

from automathon import DFA
from IPython.display import Image

# States
Q = {'q0', 'q1', 'q_dead'}

# Alphabet
sigma = set([chr(c) for c in range(ord('a'), ord('z')+1)] + ['other'])

# Transition function
delta = {
    'q0': dict(**{ch: 'q1' for ch in [chr(c) for c in range(ord('a'), ord('z')+1)]},
               **{'other': 'q_dead'}),
    'q1': dict(**{ch: 'q1' for ch in [chr(c) for c in range(ord('a'), ord('z')+1)]},
               **{'other': 'q_dead'}),
    'q_dead': {ch: 'q_dead' for ch in sigma}
}

# Start state
q0 = 'q0'

# Final states
F = {'q1'}

# Create DFA
dfa = DFA(Q, sigma, delta, q0, F)

# View DFA (this will create "dfa.gv.png")
dfa.view("dfa")

# Display DFA diagram
Image(filename="dfa.gv.png")

# Function to classify words
def classify(word):
    if not word:
        return "Not Accepted"
    if not ('a' <= word[0] <= 'z'):
        return "Not Accepted"
    for ch in word:
        if not ('a' <= ch <= 'z'):
            return "Not Accepted"
    return "Accepted"

# Test words
test_words = ["cat", "dog", "a", "zebra", "dog1", "1dog", "DogHouse", "Dog_house", " cats"]
for w in test_words:
    print(f"{w} -> {classify(w)}")






cat -> Accepted
dog -> Accepted
a -> Accepted
zebra -> Accepted
dog1 -> Not Accepted
1dog -> Not Accepted
DogHouse -> Not Accepted
Dog_house -> Not Accepted
 cats -> Not Accepted


In [22]:
import os

# Define input and output alphabets
input_alphabet = set("abcdefghijklmnopqrstuvwxyz")
output_alphabet = input_alphabet.union({"+N+SG", "+N+PL"})

# Define states
states = {
    "START",       # start reading the word
    "STEM",        # reading root letters
    "END_S",       # possible plural S
    "END_ES",      # plural ES
    "END_IES",     # plural IES
    "FINAL_SG",    # singular accept state
    "FINAL_PL"     # plural accept state
}
start_state = "START"
final_states = {"FINAL_SG", "FINAL_PL"}

# Define transitions: (state, input) -> (next_state, output)
transitions = {}

# Generic letter reading (build stem)
for ch in input_alphabet:
    transitions[("START", ch)] = ("STEM", ch)
    transitions[("STEM", ch)] = ("STEM", ch)

# Rule 1: E insertion after -s, -z, -x, -ch, -sh before adding "es"
# We'll detect these endings in the FST
special_es_endings = ["s", "z", "x"]
special_es_pairs = ["ch", "sh"]

# Rule 2: Y replacement (consonant + y → ies)
# Will handle in code logic (needs context: prev char consonant)

# Rule 3: S addition (default plural rule)

def run_fst(word):
    # First, handle plural vs singular classification manually
    if word in input_alphabet:
        return "Invalid Word"  # too short

    # Singular case: if it's in our noun list and no plural ending
    if not (word.endswith("s") or word.endswith("es") or word.endswith("ies")):
        return f"{word}+N+SG"

    # Plural cases:
    # Case 1: Ends with 'ies' → Y replacement
    if word.endswith("ies") and len(word) > 3:
        stem = word[:-3] + "y"
        # Ensure before 'y' is a consonant
        if stem[-2] not in "aeiou":
            return f"{stem}+N+PL"
        else:
            return "Invalid Word"

    # Case 2: Ends with 'es' and matches E-insertion rule
    if word.endswith("es"):
        stem = word[:-2]
        if any(stem.endswith(e) for e in special_es_endings) or any(stem.endswith(e) for e in special_es_pairs):
            return f"{stem}+N+PL"
        

    # Case 3: Ends with 's' → default plural
    if word.endswith("s"):
        stem = word[:-1]
        # reject if stem should have had 'es'
        if any(stem.endswith(e) for e in special_es_endings) or any(stem.endswith(e) for e in special_es_pairs):
            return "Invalid Word"
        # reject if stem should have had 'ies'
        if stem.endswith("y") and stem[-2] not in "aeiou":
            return "Invalid Word"
        return f"{stem}+N+PL"

    return "Invalid Word"



lexicon = set()    #unordered
if os.path.exists("brown_nouns.txt"):
        with open('brown_nouns.txt', "r") as f_in, open('output.txt', "w") as f_out:
            for line in f_in:
                word = line.strip().lower()
                if not word:
                    continue
                output = run_fst(word)
                f_out.write(f"{word} = {output}\n")

#print(f"associates-> {run_fst('associates')}")


In [2]:
from graphviz import Digraph

def visualize_fst():
    dot = Digraph(comment='Plural Morphology FST')

    # States
    states = ["START",       # start reading the word
    "STEM",        # reading root letters
    "END_S",       # possible plural S
    "END_ES",      # plural ES
    "END_IES",     # plural IES
    "FINAL_SG",    # singular accept state
    "FINAL_PL" ]
    final_states = ["FINAL_SG",    # singular accept state
    "FINAL_PL" ]

    # Add states to graph
    for s in states:
        if s in final_states:
            dot.node(s, s, shape='doublecircle')
        else:
            dot.node(s, s)

    # Transitions from q0
    dot.edge('START', 'STEM', label='s / ε')
    dot.edge('START', 'STEM', label='a–z except s / copy letter')

    # Transitions from q_s
    dot.edge('STEM', 'END_ES', label='e / ε')
    dot.edge('STEM', 'END_IES', label='i / ε')
    dot.edge('STEM', 'FINAL_PL', label='a–z except e,i,x,z,s,h / copy letter')

    # Transitions from q_se
    dot.edge('END_ES', 'FINAL_PL', label='s,x,z,o / copy letter')
    dot.edge('END_ES', 'FINAL_PL', label='h + lookahead c or s / copy ch or sh')
    #dot.edge('END_ES', 'q_dead', label='others / ε')

    # Transitions from q_sei
    dot.edge('END_IES', 'FINAL_PL', label='consonant / copy letter')
    #dot.edge('END_IES', 'q_dead', label='vowel / ε')

    # Copy plural states transitions (loop to self)
    dot.edge('FINAL_PL', 'FINAL_PL', label='a–z / copy letter')
    dot.edge('FINAL_PL', 'FINAL_PL', label='a–z / copy letter')

    # Copy singular state transitions (loop to self)
    dot.edge('FINAL_SG', 'FINAL_SG', label='a–z / copy letter')

    # Dead state loop
    #dot.edge('q_dead', 'q_dead', label='any / ε')

    # You can add labels for final outputs on final states
    dot.node('FINAL_PL', 'FINAL_PL\n(final +N+PL)', shape='doublecircle', color='black')
    dot.node('FINAL_PL', 'FINAL_PL\n(final +N+PL)', shape='doublecircle', color='black')
    dot.node('FINAL_SG', 'FINAL_SG\n(final +N+SG)', shape='doublecircle', color='black')

    # Render to a file (PDF/PNG/SVG)
    dot.render('fst_plural_morphology', format='png', cleanup=True)
    print("FST diagram saved as fst_plural_morphology.png")

visualize_fst()



FST diagram saved as fst_plural_morphology.png
