# Part 1: Rule‑Based G2P Conversion and Duration Modeling


In [1]:
def simple_g2p(word):
    word = word.lower()
    phoneme_sequence = []
    i = 0
    while i < len(word):
        if i < len(word) - 1 and word[i:i+2] == "th":
            phoneme_sequence.append("DH")
            i += 2
        elif i < len(word) - 1 and word[i] == "c":
            if word[i+1] in ["e", "i", "y"]:
                phoneme_sequence.append("S")
            else:
                phoneme_sequence.append("K")
            i += 1
        elif i < len(word) - 1 and word[i] == word[i+1] and word[i].isalpha():
            phoneme_sequence.append(word[i].upper())
            i += 2
        else:
            phoneme_sequence.append(word[i].upper())
            i += 1

    if word == "the":
        phoneme_sequence = ["DH", "AH"]

    return phoneme_sequence


In [2]:
words1 = ["cat", "cent", "apple", "the", "book", "tree"]
for w in words1:
    phonemes = simple_g2p(w)
    print(f"Word: {w} -> Phonemes: {'-'.join(phonemes)}")


Word: cat -> Phonemes: K-A-T
Word: cent -> Phonemes: S-E-N-T
Word: apple -> Phonemes: A-P-L-E
Word: the -> Phonemes: DH-AH
Word: book -> Phonemes: B-O-K
Word: tree -> Phonemes: T-R-E


 Part 2: Basic Prosodic Modeling- Duration

In [3]:
def simple_g2p_with_duration(word):
    
    word = word.lower()
    phoneme_sequence = []
    durations = []
    vowels = "aeiou"
    base_vowel_duration = 100   
    base_consonant_duration = 50 
    final_syllable_duration_increase = 20  

    i = 0
    while i < len(word):
        if i < len(word) - 1 and word[i:i+2] == "th":
            phoneme = "DH"
            i += 2
        elif i < len(word) - 1 and word[i] == "c":
            if word[i+1] in vowels:
                phoneme = "S"
            else:
                phoneme = "K"
            i += 1
        elif i < len(word) - 1 and word[i] == word[i+1] and word[i].isalpha():
            phoneme = word[i].upper()
            i += 2
        else:
            phoneme = word[i].upper()
            i += 1

        phoneme_sequence.append(phoneme)

        if phoneme.lower() in vowels:
            durations.append(base_vowel_duration)
        else:
            durations.append(base_consonant_duration)

    if durations:
        durations[-1] += final_syllable_duration_increase

    if word == "the":
        phoneme_sequence = ["DH", "AH"]
        durations = [
            base_consonant_duration,
            base_vowel_duration + final_syllable_duration_increase
        ]

    return phoneme_sequence, durations


In [4]:
if __name__ == "__main__":
    words1 = ["cat", "cent", "apple", "the", "book", "tree"]
    for w in words1:
        phonemes = simple_g2p(w)
        print(f"Word: {w} -> Phonemes: {'-'.join(phonemes)}")

    print()

    words2 = ["cat", "apple", "hello"]
    for w in words2:
        phonemes, durs = simple_g2p_with_duration(w)
        print(f"Word: {w} -> Phonemes: {'-'.join(phonemes)} -> Durations (ms): {durs}")


Word: cat -> Phonemes: K-A-T
Word: cent -> Phonemes: S-E-N-T
Word: apple -> Phonemes: A-P-L-E
Word: the -> Phonemes: DH-AH
Word: book -> Phonemes: B-O-K
Word: tree -> Phonemes: T-R-E

Word: cat -> Phonemes: S-A-T -> Durations (ms): [50, 100, 70]
Word: apple -> Phonemes: A-P-L-E -> Durations (ms): [100, 50, 50, 120]
Word: hello -> Phonemes: H-E-L-O -> Durations (ms): [50, 100, 50, 120]


# Cell 4: G2P Mapping Dictionary


In [5]:
# A standalone grapheme→phoneme map for single letters and common digraphs
g2p_map = {
    'a': 'AH', 'b': 'B',  'c': 'K',  'd': 'D',
    'e': 'EH','f': 'F',  'g': 'G',  'h': 'HH',
    'i': 'IH','j': 'JH', 'k': 'K',  'l': 'L',
    'm': 'M', 'n': 'N',  'o': 'OW', 'p': 'P',
    'q': 'K', 'r': 'R',  's': 'S',  't': 'T',
    'u': 'UH','v': 'V',  'w': 'W',  'x': 'K S',
    'y': 'Y', 'z': 'Z',
    'th': 'DH','sh': 'SH','ch': 'CH','ng': 'NG',
    'ee': 'IY','oo': 'UW'
}

def map_word_to_phonemes(word, mapping):
    """
    Apply a simple dictionary lookup (with longest-match first) 
    to convert word → phoneme list.
    """
    w = word.lower()
    phonemes = []
    i = 0
    while i < len(w):
        # try two‑char digraph
        if i < len(w)-1 and w[i:i+2] in mapping:
            phonemes.append(mapping[w[i:i+2]])
            i += 2
        else:
            phonemes.append(mapping.get(w[i], w[i].upper()))
            i += 1
    return phonemes

# Example:
print(map_word_to_phonemes("threshold", g2p_map))  # ['TH','R','EH','SH','OW','L','D']


['DH', 'R', 'EH', 'SH', 'OW', 'L', 'D']


# Cell 5: Extract Acoustic Features (duration, pitch, loudness, prosodic)


In [6]:
import librosa
import numpy as np

# Replace with your file
audio_path = 'path_to_audio.wav'
y, sr = librosa.load(audio_path, sr=None)

# Duration (s)
duration = librosa.get_duration(y=y, sr=sr)

# Pitch (F0) via PYIN
f0, voiced_flag, voiced_probs = librosa.pyin(
    y, 
    fmin=librosa.note_to_hz('C2'), 
    fmax=librosa.note_to_hz('C7')
)
mean_pitch = np.nanmean(f0)

# Loudness (RMS energy)
rms = librosa.feature.rms(y=y)[0]
mean_loudness = np.mean(rms)

# Prosodic proxy: Tempo (beats per minute)
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)

features = {
    'duration_s': duration,
    'mean_pitch_hz': mean_pitch,
    'mean_loudness_rms': mean_loudness,
    'tempo_bpm': tempo
}

print(features)


  y, sr = librosa.load(audio_path, sr=None)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


FileNotFoundError: [Errno 2] No such file or directory: 'path_to_audio.wav'

# Cell 6: Visualize Pitch & Loudness Over Time


In [7]:
import matplotlib.pyplot as plt

# Pitch contour
times = librosa.times_like(f0, sr=sr)
plt.figure()
plt.plot(times, f0, label='F0')
plt.title('Pitch Contour')
plt.xlabel('Time (s)')
plt.ylabel('Frequency (Hz)')
plt.tight_layout()
plt.show()

# RMS loudness
times_rms = librosa.frames_to_time(np.arange(len(rms)), sr=sr)
plt.figure()
plt.plot(times_rms, rms, label='RMS')
plt.title('Loudness (RMS)')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.tight_layout()
plt.show()


NameError: name 'f0' is not defined