#### Defined input/output alphabets for sigma & gamma

In [1]:
# Input alphabet Σ = {a…z, s, es, ies, #}
# (letters + suffix markers + end-of-word)
Sigma = list("abcdefghijklmnopqrstuvwxyz") + ["<SUF_S>", "<SUF_ES>", "<SUF_IES>", "#"]

# Output alphabet Γ = {a…z, +, N, SG, PL}
Gamma = list("abcdefghijklmnopqrstuvwxyz") + ["+N+SG", "+N+PL"]

print("Input Alphabet Σ:", Sigma)
print("Output Alphabet Γ:", Gamma)


Input Alphabet Σ: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '<SUF_S>', '<SUF_ES>', '<SUF_IES>', '#']
Output Alphabet Γ: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '+N+SG', '+N+PL']


#### Defined states Q, start state q0, final states F

In [2]:
# States Q
Q = {
    "q0",       # initial state (reading root letters)
    "qPL",      # plural branch after consuming suffix
    "qSG",      # singular branch at end-of-word
    "qACCEPT"   # final accepting state
}

q0 = "q0"          # start state
F  = {"qSG", "qACCEPT"}   # set of final states

print("States Q:", Q)
print("Start State:", q0)
print("Final States F:", F)


States Q: {'q0', 'qACCEPT', 'qPL', 'qSG'}
Start State: q0
Final States F: {'qACCEPT', 'qSG'}


#### Defined delta the transition function as dictionary 

In [3]:
# δ: (state, input_symbol) -> (next_state, output)

delta = {}

# 1) Copy root letters in q0
for ch in "abcdefghijklmnopqrstuvwxyz":
    delta[(q0, ch)] = (q0, ch)

# 2) Singular branch: if we reach end-of-word from q0
delta[(q0, "#")] = ("qSG", "+N+SG")

# 3) Plural suffix branches from q0
delta[(q0, "<SUF_S>")]   = ("qPL", "+N+PL")
delta[(q0, "<SUF_ES>")]  = ("qPL", "+N+PL")
delta[(q0, "<SUF_IES>")] = ("qPL", "y+N+PL")  # we output 'y' as root + plural tag

# 4) End-of-word after plural → accept
delta[("qPL", "#")] = ("qACCEPT", "")


#### Writing the analyzer

In [17]:
def run_fst(word_tokens):
    state = q0
    output = []
    for sym in word_tokens:
        if (state, sym) not in delta:
            return "Invalid Word"
        state, out = delta[(state, sym)]
        if out:
            output.append(out)
    if state in F:
        return "".join(output)
    return "Invalid Word"


#### Another transition table tok_delta

In [38]:
# Input alphabet
letters = list("abcdefghijklmnopqrstuvwxyz")
tok_delta = {}

# --- 1) Default: copy letters
for ch in letters:
    tok_delta[("q0", ch)] = ("q0", [ch])

# --- 2) End marker
tok_delta[("q0", "#")] = ("qACCEPT", ["#"])

# --- 3) Suffix rules

# (a) plain "s"
tok_delta[("q0", "s")] = ("qS", [])                  # buffer 's'
tok_delta[("qS", "#")] = ("qACCEPT", ["<SUF_S>", "#"])   # s at end → <SUF_S>
tok_delta[("qS", "s")] = ("q0", ["s", "s"])        # double s stays stem
for ch in letters:
    if ch != "s":
        tok_delta[("qS", ch)] = ("q0", ["<SUF_S>", ch])   # release 's' if not suffix

# (b) "es"
tok_delta[("q0", "e")] = ("qE", [])                 # buffer 'e'
tok_delta[("qE", "s")] = ("qACCEPT", ["<SUF_ES>", "#"])  # es at end
tok_delta[("qE", "#")] = ("qACCEPT", ["e", "#"])         # lone 'e' at end
for ch in letters:
    if ch != "s":
        tok_delta[("qE", ch)] = ("q0", ["e", ch])         # release 'e' if not suffix

# (c) "ies"
tok_delta[("q0", "i")] = ("qI", [])                 # buffer 'i'
tok_delta[("qI", "e")] = ("qIE", [])               # buffer 'ie'
tok_delta[("qIE", "s")] = ("qACCEPT", ["<SUF_IES>", "#"]) # ies at end
tok_delta[("qI", "#")] = ("qACCEPT", ["i", "#"])          # lone 'i'
tok_delta[("qIE", "#")] = ("qACCEPT", ["i", "e", "#"])    # lone 'ie'
for ch in letters:
    if ch != "e":
        tok_delta[("qI", ch)] = ("q0", ["i", ch])          # release 'i' if not suffix
for ch in letters:
    if ch != "s":
        tok_delta[("qIE", ch)] = ("q0", ["i", "e", ch])    # release 'ie' if not suffix


#### Adding a tokenizer runner

In [30]:
def run_tokenizer(word):
    state = "q0"
    output = []
    for ch in list(word) + ["#"]:
        if (state, ch) not in tok_delta:
            return ["Invalid"]
        state, out = tok_delta[(state, ch)]
        if out:
            # out is a list of strings → extend, not append
            output.extend(out)
    return output


#### Pipeline from tokenizer to analyzer

In [31]:
def analyze_word(word):
    toks = run_tokenizer(word.lower())
    return word, toks, run_fst(toks)


#### Testing the pipeline

In [39]:
for w in ["fox", "foxes", "bags", "tries","class","classes", "bus", "buses", "cactus", "cacti"]:
    print(analyze_word(w))


('fox', ['f', 'o', 'x', '#'], 'fox+N+SG')
('foxes', ['Invalid'], 'Invalid Word')
('bags', ['b', 'a', 'g', '<SUF_S>', '#'], 'bag+N+PL')
('tries', ['Invalid'], 'Invalid Word')
('class', ['c', 'l', 'a', 's', 's', '#'], 'class+N+SG')
('classes', ['Invalid'], 'Invalid Word')
('bus', ['b', 'u', '<SUF_S>', '#'], 'bu+N+PL')
('buses', ['b', 'u', '<SUF_S>', 'e', '<SUF_S>', '#'], 'Invalid Word')
('cactus', ['c', 'a', 'c', 't', 'u', '<SUF_S>', '#'], 'cactu+N+PL')
('cacti', ['c', 'a', 'c', 't', 'i', '#'], 'cacti+N+SG')
