In [1]:
import os

# Set environment variable to handle OpenMP conflict, deleting this will take down prod
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

import gradio as gr
from PIL import Image
import librosa
import noisereduce as nr


# Function definitions for each step (placeholders)
def preprocess_audio(audio):
    # Load the audio file
    y, sr = librosa.load(audio, sr=None)
    # Noise reduction
    reduced_noise = nr.reduce_noise(y=y, sr=sr)
    # Normalization
    normalized_audio = librosa.util.normalize(reduced_noise)
    return normalized_audio, sr

def segment_speech(audio):
    # Implement speech segmentation
    segmented_speech = audio  # Placeholder for actual segmentation
    return segmented_speech

def transcribe_speech(audio, sr):
    # Implement speech transcription using an ASR system
    transcription = "This is a sample transcription."  # Placeholder for actual transcription
    return transcription

def phoneme_analysis(transcription, standard_text):
    # Implement phoneme analysis
    phoneme_comparison = "Phoneme comparison results"  # Placeholder for actual analysis
    return phoneme_comparison

def calculate_pronunciation_score(phoneme_comparison):
    # Calculate pronunciation score based on phoneme comparison
    score = 85  # Placeholder
    return score

def generate_feedback(score):
    # Generate visual feedback and improvement tips based on the score
    feedback = "Your pronunciation score is 85. Try to improve on specific phonemes."  # Placeholder for actual feedback
    return feedback

def mispronunciation_detection(audio, standard_text):
    preprocessed_audio, sr = preprocess_audio(audio)
    segmented_speech = segment_speech(preprocessed_audio)
    transcription = transcribe_speech(segmented_speech, sr)
    phoneme_comparison = phoneme_analysis(transcription, standard_text)
    score = calculate_pronunciation_score(phoneme_comparison)
    feedback = generate_feedback(score)
    return transcription, score, feedback

# Load the flowchart image
flowchart_image_path = "./public/image.png"
if os.path.exists(flowchart_image_path):
    flowchart_image = Image.open(flowchart_image_path)
else:
    flowchart_image = None

# Define the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("<h1>Mispronunciation Detection and Correction System</h1>")
    
    with gr.Row():
        if flowchart_image:
            gr.Image(flowchart_image, label="Flowchart")
        else:
            gr.Markdown("Flowchart image not found.")
        
    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(type="numpy", label="Record Your Speech")
            standard_text_input = gr.Textbox(label="Standard Text")
            submit_button = gr.Button("Submit")
        
        with gr.Column():
            transcription_output = gr.Textbox(label="Transcription")
            score_output = gr.Number(label="Pronunciation Score")
            feedback_output = gr.Textbox(label="Feedback")

    submit_button.click(mispronunciation_detection, 
                        inputs=[audio_input, standard_text_input], 
                        outputs=[transcription_output, score_output, feedback_output])

demo.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


