In [2]:
# # First, install required packages
# !pip install SpeechRecognition
# !pip install pyaudio
# !pip install eng-to-ipa
# !pip install pandas
# !pip install python-Levenshtein

In [1]:
import speech_recognition as sr
import eng_to_ipa
import pandas as pd
from datetime import datetime
from IPython.display import HTML, display
import Levenshtein
import warnings
warnings.filterwarnings('ignore')

In [2]:
class PronunciationAnalyzer:
    def __init__(self):
        self.recognizer = sr.Recognizer()
        self.results = []

    def record_audio(self, duration=5):
        """Record audio from microphone for specified duration"""
        with sr.Microphone() as source:
            print(f"\nAdjusting for ambient noise... Please wait...")
            self.recognizer.adjust_for_ambient_noise(source, duration=1)

            print(f"\nRecording for {duration} seconds...\n")
            audio = self.recognizer.listen(source, timeout=duration)
            print("Recording complete!\n")

            return audio

    def get_ipa_pronunciation(self, text):
        """Convert text to IPA phonetic notation"""
        try:
            ipa = eng_to_ipa.convert(text)
            return ipa if ipa != text else "Conversion failed"
        except:
            return "IPA conversion failed"

    def calculate_pronunciation_accuracy(self, spoken_text, correct_text):
        """Calculate pronunciation accuracy using Levenshtein distance"""
        spoken_ipa = self.get_ipa_pronunciation(spoken_text)
        correct_ipa = self.get_ipa_pronunciation(correct_text)

        if "failed" in spoken_ipa or "failed" in correct_ipa:
            return 0, "N/A", "N/A"

        # Calculate Levenshtein distance
        distance = Levenshtein.distance(spoken_ipa, correct_ipa)
        max_len = max(len(spoken_ipa), len(correct_ipa))

        # Calculate accuracy percentage
        accuracy = ((max_len - distance) / max_len) * 100 if max_len > 0 else 0

        return accuracy, spoken_ipa, correct_ipa

    def analyze_speech(self, correct_text, duration=5):
        """Record and analyze speech, comparing with correct text"""
        try:
            # Record audio
            audio = self.record_audio(duration)

            # Convert speech to text
            spoken_text = self.recognizer.recognize_google(audio)
            print(f"You said: {spoken_text}")
            print(f"Correct text was: {correct_text}\n")

            # Analyze each word pair
            spoken_words = spoken_text.lower().split()
            correct_words = correct_text.lower().split()

            # Analyze as many words as we can
            for spoken, correct in zip(spoken_words, correct_words):
                accuracy, spoken_ipa, correct_ipa = self.calculate_pronunciation_accuracy(spoken, correct)

                analysis = {
                    'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                    'Spoken Word': spoken,
                    'Correct Word': correct,
                    'Spoken IPA': spoken_ipa,
                    'Correct IPA': correct_ipa,
                    'Accuracy (%)': f"{accuracy:.1f}",
                    'Status': self.get_accuracy_status(accuracy)
                }
                self.results.append(analysis)

            # Display results
            self.display_results()

        except sr.UnknownValueError:
            print("Could not understand the audio")
        except sr.RequestError as e:
            print(f"Error with the speech recognition service; {e}")
        except Exception as e:
            print(f"An error occurred: {e}")

    def get_accuracy_status(self, accuracy):
        """Return status based on pronunciation accuracy"""
        if accuracy >= 90:
            return "Excellent"
        elif accuracy >= 75:
            return "Good"
        elif accuracy >= 60:
            return "Fair"
        else:
            return "Needs Practice"

    def display_results(self):
        """Display analysis results in a formatted table"""
        df = pd.DataFrame(self.results)
        display(HTML(df.to_html(index=False)))

    def save_report(self, filename='pronunciation_analysis_report.csv'):
        """Save analysis results to a CSV file"""
        df = pd.DataFrame(self.results)
        df.to_csv(filename, index=False)
        print(f"\nReport saved as {filename}")

    def clear_results(self):
        """Clear previous results"""
        self.results = []

def practice_pronunciation(correct_text, duration=5):
    """Function to practice pronunciation of a specific text"""
    analyzer = PronunciationAnalyzer()
    analyzer.analyze_speech(correct_text, duration)
    return analyzer

# Example usage:
# correct_text = "hello world"
# analyzer = practice_pronunciation(correct_text, duration=5)
# analyzer.save_report('my_pronunciation_report.csv')

In [3]:
# Example: Practice pronouncing "hello world"
correct_text = "hello Suraj good morning"
analyzer = practice_pronunciation(correct_text, duration=5)


Adjusting for ambient noise... Please wait...

Recording for 5 seconds...

Recording complete!

You said: hello hi there
Correct text was: hello Suraj good morning



Timestamp,Spoken Word,Correct Word,Spoken IPA,Correct IPA,Accuracy (%),Status
2025-02-06 14:02:26,hello,hello,hɛˈloʊ,hɛˈloʊ,100.0,Excellent
2025-02-06 14:02:26,hi,suraj,haɪ,suraj*,16.7,Needs Practice
2025-02-06 14:02:26,there,good,ðɛr,gʊd,0.0,Needs Practice
