<a href="https://colab.research.google.com/github/steffiangel/Speech-Processing/blob/main/spr_lab7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Initial probabilities
initial_prob = {'/s/': 1.0, '/p/': 0.0, '/ie:/': 0.0, '/tS/': 0.0}

# Transition probabilities
transition_prob = {
    '/s/': {'/s/': 0.1, '/p/': 0.8, '/ie:/': 0.1, '/tS/': 0.0},
    '/p/': {'/s/': 0.0, '/p/': 0.1, '/ie:/': 0.8, '/tS/': 0.1},
    '/ie:/': {'/s/': 0.0, '/p/': 0.0, '/ie:/': 0.2, '/tS/': 0.8},
    '/tS/': {'/s/': 0.2, '/p/': 0.0, '/ie:/': 0.0, '/tS/': 0.8}
}

# Emission probabilities
emission_prob = {
    '/s/': {'Energy': 0.7, 'Pitch': 0.2, 'Duration': 0.1},
    '/p/': {'Energy': 0.5, 'Pitch': 0.3, 'Duration': 0.2},
    '/ie:/': {'Energy': 0.3, 'Pitch': 0.5, 'Duration': 0.2},
    '/tS/': {'Energy': 0.4, 'Pitch': 0.4, 'Duration': 0.2}
}


In [2]:
def display_hmm_params(initial_prob, transition_prob, emission_prob):
    print("Initial Probabilities:")
    for phoneme, prob in initial_prob.items():
        print(f"  {phoneme}: {prob}")

    print("\nTransition Probabilities:")
    for from_phoneme, to_probs in transition_prob.items():
        print(f"  From {from_phoneme}:")
        for to_phoneme, prob in to_probs.items():
            print(f"    To {to_phoneme}: {prob}")

    print("\nEmission Probabilities:")
    for phoneme, obs_probs in emission_prob.items():
        print(f"  For {phoneme}:")
        for observation, prob in obs_probs.items():
            print(f"    {observation}: {prob}")

# Display the HMM parameters
display_hmm_params(initial_prob, transition_prob, emission_prob)


Initial Probabilities:
  /s/: 1.0
  /p/: 0.0
  /ie:/: 0.0
  /tS/: 0.0

Transition Probabilities:
  From /s/:
    To /s/: 0.1
    To /p/: 0.8
    To /ie:/: 0.1
    To /tS/: 0.0
  From /p/:
    To /s/: 0.0
    To /p/: 0.1
    To /ie:/: 0.8
    To /tS/: 0.1
  From /ie:/:
    To /s/: 0.0
    To /p/: 0.0
    To /ie:/: 0.2
    To /tS/: 0.8
  From /tS/:
    To /s/: 0.2
    To /p/: 0.0
    To /ie:/: 0.0
    To /tS/: 0.8

Emission Probabilities:
  For /s/:
    Energy: 0.7
    Pitch: 0.2
    Duration: 0.1
  For /p/:
    Energy: 0.5
    Pitch: 0.3
    Duration: 0.2
  For /ie:/:
    Energy: 0.3
    Pitch: 0.5
    Duration: 0.2
  For /tS/:
    Energy: 0.4
    Pitch: 0.4
    Duration: 0.2


In [3]:
import random

# Phonemes and observations
phonemes = ['/s/', '/p/', '/ie:/', '/tS/']
observations = ['Energy', 'Pitch', 'Duration']

# Function to generate a sequence of phonemes and observations
def generate_sequence(initial_prob, transition_prob, emission_prob, length=5):
    # Choose the first phoneme based on initial probabilities
    current_phoneme = random.choices(list(initial_prob.keys()), list(initial_prob.values()))[0]

    phoneme_sequence = [current_phoneme]
    observation_sequence = [random.choices(observations, list(emission_prob[current_phoneme].values()))[0]]

    # Generate the rest of the sequence
    for _ in range(length - 1):
        next_phoneme = random.choices(list(transition_prob[current_phoneme].keys()), list(transition_prob[current_phoneme].values()))[0]
        phoneme_sequence.append(next_phoneme)
        observation_sequence.append(random.choices(observations, list(emission_prob[next_phoneme].values()))[0])
        current_phoneme = next_phoneme

    return phoneme_sequence, observation_sequence

# Generate a sequence for the word "speech"
phoneme_seq, observation_seq = generate_sequence(initial_prob, transition_prob, emission_prob)
print("\nGenerated Phoneme Sequence:", phoneme_seq)
print("Generated Observation Sequence:", observation_seq)



Generated Phoneme Sequence: ['/s/', '/p/', '/ie:/', '/ie:/', '/ie:/']
Generated Observation Sequence: ['Pitch', 'Energy', 'Energy', 'Pitch', 'Pitch']


In [None]:
def viterbi_algorithm(observations, initial_prob, transition_prob, emission_prob):
    # Initialize the Viterbi table
    viterbi = {}
    backpointer = {}

    # Initialization step
    for phoneme in initial_prob:
        viterbi[phoneme] = initial_prob[phoneme] * emission_prob[phoneme].get(observations[0], 0)
        backpointer[phoneme] = None

    # Recursion step
    for t in range(1, len(observations)):
        new_viterbi = {}
        new_backpointer = {}
        for current_phoneme in phonemes:
            max_prob, prev_phoneme = max(
                [(viterbi[prev] * transition_prob[prev].get(current_phoneme, 0) * emission_prob[current_phoneme].get(observations[t], 0), prev)
                 for prev in phonemes], key=lambda x: x[0])
            new_viterbi[current_phoneme] = max_prob
            new_backpointer[current_phoneme] = prev_phoneme
        viterbi = new_viterbi
        backpointer = new_backpointer

    # Backtrack to find the most likely sequence
    best_path = []
    last_phoneme = max(viterbi, key=viterbi.get)
    best_path.append(last_phoneme)

    while backpointer[last_phoneme] is not None:
        last_phoneme = backpointer[last_phoneme]
        best_path.append(last_phoneme)

    return best_path[::-1]

# Perform inference for a given observation sequence
observations_seq = ['Energy', 'Pitch', 'Duration', 'Energy', 'Pitch']
inferred_phonemes = viterbi_algorithm(observations_seq, initial_prob, transition_prob, emission_prob)
print("\nInferred Phoneme Sequence:", inferred_phonemes)
