<a href="https://colab.research.google.com/github/mrkim21/mrkim21.github.io/blob/main/appfolder/appcodes/240217_pronunciation_checker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pronunciation Checker

[applink](https://mrkim21.github.io/appfolder/pronunciation-checker.html)

In [None]:
!pip install gradio speech_recognition Levenshtein soundfile

In [None]:
import gradio as gr
import speech_recognition as sr
from Levenshtein import ratio
import tempfile
import numpy as np
import soundfile as sf

def transcribe_audio(file_info):
    r = sr.Recognizer()

    # file_info[0] is the file name, file_info[1] is the NumPy array
    # Save the NumPy array to a temporary WAV file
    with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmpfile:
        sf.write(file=tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)

        with sr.AudioFile(tmpfile.name) as source:
            audio_data = r.record(source)

    try:
        text = r.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {e}"

def pronunciation_correction(expected_text, file_info):
    user_spoken_text = transcribe_audio(file_info)
    similarity = ratio(expected_text.lower(), user_spoken_text.lower())
    if similarity > 0.8:
        return "Good pronunciation!", similarity
    else:
        return "Try again, make sure to pronounce clearly.", similarity

iface = gr.Interface(
    fn=pronunciation_correction,
    inputs=[
        gr.Textbox(label="Expected Text"),
        gr.Audio(label="Upload Audio File", type="numpy")  # Specify type="numpy" to ensure file_info[1] is a NumPy array
    ],
    outputs=["text", "number"],
    title="Pronunciation Correction Tool"
)

iface.launch(debug=True)
