# Part 1: Rule‑Based G2P Conversion and Duration Modeling


In [1]:
def simple_g2p(word):
    word = word.lower()
    phoneme_sequence = []
    i = 0
    while i < len(word):
        # lookahead for digraphs
        if i < len(word) - 1 and word[i:i+2] == "th":
            phoneme_sequence.append("DH")
            i += 2
        elif i < len(word) - 1 and word[i] == "c":
            if word[i+1] in ["e", "i", "y"]:
                phoneme_sequence.append("S")
            else:
                phoneme_sequence.append("K")
            i += 1
        elif i < len(word) - 1 and word[i] == word[i+1] and word[i].isalpha():
            # double letters → single phoneme
            phoneme_sequence.append(word[i].upper())
            i += 2
        else:
            # default: uppercase letter as phoneme
            phoneme_sequence.append(word[i].upper())
            i += 1

    # exception for "the"
    if word == "the":
        phoneme_sequence = ["DH", "AH"]

    return phoneme_sequence


In [2]:
words1 = ["cat", "cent", "apple", "the", "book", "tree"]
for w in words1:
    phonemes = simple_g2p(w)
    print(f"Word: {w} -> Phonemes: {'-'.join(phonemes)}")


Word: cat -> Phonemes: K-A-T
Word: cent -> Phonemes: S-E-N-T
Word: apple -> Phonemes: A-P-L-E
Word: the -> Phonemes: DH-AH
Word: book -> Phonemes: B-O-K
Word: tree -> Phonemes: T-R-E


 Part 2: Basic Prosodic Modeling- Duration

In [3]:
# Cell 2: simple_g2p_with_duration
def simple_g2p_with_duration(word):
    """
    Basic G2P with simple duration modeling.
    Mirrors MATLAB simple_g2p_with_duration from SP_LAB_9.pdf.
    :param word: input word (string)
    :return: tuple (phoneme_sequence, durations_ms)
    """
    word = word.lower()
    phoneme_sequence = []
    durations = []
    vowels = "aeiou"
    base_vowel_duration = 100    # ms
    base_consonant_duration = 50 # ms
    final_syllable_duration_increase = 20  # ms

    i = 0
    while i < len(word):
        # determine phoneme exactly as in simple_g2p
        if i < len(word) - 1 and word[i:i+2] == "th":
            phoneme = "DH"
            i += 2
        elif i < len(word) - 1 and word[i] == "c":
            if word[i+1] in vowels:
                phoneme = "S"
            else:
                phoneme = "K"
            i += 1
        elif i < len(word) - 1 and word[i] == word[i+1] and word[i].isalpha():
            phoneme = word[i].upper()
            i += 2
        else:
            phoneme = word[i].upper()
            i += 1

        phoneme_sequence.append(phoneme)

        if phoneme.lower() in vowels:
            durations.append(base_vowel_duration)
        else:
            durations.append(base_consonant_duration)

    if durations:
        durations[-1] += final_syllable_duration_increase

    if word == "the":
        phoneme_sequence = ["DH", "AH"]
        durations = [
            base_consonant_duration,
            base_vowel_duration + final_syllable_duration_increase
        ]

    return phoneme_sequence, durations


In [4]:
if __name__ == "__main__":
    words1 = ["cat", "cent", "apple", "the", "book", "tree"]
    for w in words1:
        phonemes = simple_g2p(w)
        print(f"Word: {w} -> Phonemes: {'-'.join(phonemes)}")

    print()

    words2 = ["cat", "apple", "hello"]
    for w in words2:
        phonemes, durs = simple_g2p_with_duration(w)
        print(f"Word: {w} -> Phonemes: {'-'.join(phonemes)} -> Durations (ms): {durs}")


Word: cat -> Phonemes: K-A-T
Word: cent -> Phonemes: S-E-N-T
Word: apple -> Phonemes: A-P-L-E
Word: the -> Phonemes: DH-AH
Word: book -> Phonemes: B-O-K
Word: tree -> Phonemes: T-R-E

Word: cat -> Phonemes: S-A-T -> Durations (ms): [50, 100, 70]
Word: apple -> Phonemes: A-P-L-E -> Durations (ms): [100, 50, 50, 120]
Word: hello -> Phonemes: H-E-L-O -> Durations (ms): [50, 100, 50, 120]
