Library

In [3]:
import nltk
import speech_recognition as sr

from g2p_en import G2p
from nltk.corpus import cmudict
from colorama import Fore

Download CMU Dictionary

In [4]:
try:
    nltk.data.find('corpora/cmudict.zip')
    print("CMUdict is already installed.")
except LookupError:
    nltk.download('cmudict')
    print("CMUdict has been downloaded successfully.")
d = cmudict.dict()

CMUdict is already installed.


Function to get word pronunciation from CMUdict

In [5]:
def get_cmu_pronunciation(word):
    word = word.lower()
    if word in d:
        return d[word]
    else:
        return None

Function to get input from Speech-to-Text

In [6]:
audio_file = "E:\Perkuliahan\Semester 4\Pembelajaran Mesin\Tugas Membuat Makalah\Speech-to-text\Data Suara\Data 8.wav"

In [7]:
def get_speech_to_text(file_path):
    recognizer = sr.Recognizer()
    with sr.AudioFile(file_path) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data, language="en-us", show_all=False)
            return text.lower()
        except sr.UnknownValueError as e:
            print("Speech recognition could not understand audio")
            return ""
        except sr.RequestError as e:
            print("Error from Google Speech Recognition service")
            return ""

Function to convert text to ARPAbet

In [8]:
def text_to_arpabet(text):
    g2p = G2p()
    phonemes = g2p(text)
    return ' '.join(phonemes)

Function Error Detection

In [9]:
def detect_pronunciation_error(word, phonetic):
    cmu_pronunciation = get_cmu_pronunciation(word)
    if cmu_pronunciation:
        cmu_pronunciation = cmu_pronunciation[0]
        if len(cmu_pronunciation) != len(phonetic):
            return "Panjang pengucapan tidak cocok."
        for i in range(len(cmu_pronunciation)):
            if cmu_pronunciation[i] != phonetic[i]:
                # print('Kata Salah')
                return Fore.RED + word
                # return f"Kesalahan pada fonem ke-{i+1}: CMUdict: {cmu_pronunciation[i]}, Pengguna: {user_phonetic[i]}"
        # print('Kata Benar')
        return Fore.GREEN + word
    else:
        # print('Kata Tidak Ditemukan di CMUdict')
        return Fore.YELLOW + word

Detection Error

In [10]:
input_words = get_speech_to_text(audio_file).split()
output_words = []
for input_word in input_words:
    user_phonetic_trancription = text_to_arpabet(input_word)

    user_phonetic = user_phonetic_trancription.split()
    output_words.append(detect_pronunciation_error(input_word, user_phonetic))
print(' '.join(output_words))

[32mlondon [32mthe [32mcapital [32mcity [32mof [32mthe [32munited [32mkingdom [32mis [32ma [32mfire [32mmetabolic [32mrate [32min [32mhistory [32mand [32mculture [32mknow [32mas [32mthe [32msquare [32mmile [32mthe [32mcity [32mof [32mlondon [32mis [32mthe [32mhistorical [32mwhere [32mthe [32mromans [32mfirst [32mestablished [32mlondon [32myou [32mtoday [32mit's [32ma [32mmajor [32mbusiness [32mand [32mfinancial [32mcenter [32mhosting [32mthe [32mbank [32mof [32mengland [32mthe [32mroyal [32mexchange [32mand [32mlondon [32mstock [32mexchange [32mdespite [32mhis [32mmother [32mand [32msky [32mcharacters [32mlike [32mthe [32mhurricane [32mand [32mthe [32mwalking [32mdistance [32mfrom [32mtower [32mof [32mlondon [32mthe [32mcity [32mmissouri [32mweather
