# Speech Recogination 

## Steps Involved
### Step 1:: Importing the dependencies
### Step 2:: Audio and Recording Function 
### Step 3:: Transcribing the audio
### Step 4:: Caluclating  the comparision matrix
### Step 5::  Calculating accuracy and implemnting the main function

### Step1 Importing the dependencies

In [7]:
import pyaudio
import wave
import speech_recognition as sr
import Levenshtein
import difflib
import threading
import keyboard
import nltk
from nltk.corpus import wordnet as wn
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk import ne_chunk, pos_tag
from collections import Counter
import string


### Step 2:: Audio and Recording Function 

In [8]:
# Audio recording parameters
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK = 1024
WAVE_OUTPUT_FILENAME = "output.wav"

# Global variable to control recording
RECORDING = False

def record_audio():
    global RECORDING

    audio = pyaudio.PyAudio()

    # Open the stream
    stream = audio.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK)

    print("Recording...")

    frames = []

    # Read data from the microphone while RECORDING is True
    while RECORDING:
        data = stream.read(CHUNK)
        frames.append(data)

    print("Recording finished.")

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded data as a WAV file
    with wave.open(WAVE_OUTPUT_FILENAME, 'wb') as wf:
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(audio.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(b''.join(frames))

def start_recording():
    global RECORDING
    RECORDING = True
    record_thread = threading.Thread(target=record_audio)
    record_thread.start()

def stop_recording():
    global RECORDING
    RECORDING = False



### Step3 Transcribing the audio

In [9]:
def transcribe_audio():
    recognizer = sr.Recognizer()

    with sr.AudioFile(WAVE_OUTPUT_FILENAME) as source:
        audio_data = recognizer.record(source)

    try:
        print("Transcribing audio...")
        transcribed_text = recognizer.recognize_google(audio_data)
        return transcribed_text
    except sr.UnknownValueError:
        return "Google Web Speech API could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results from Google Web Speech API; {e}"

def get_synonyms(word):
    synonyms = set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name().lower())
    return synonyms

def is_synonym(word1, word2):
    synonyms1 = get_synonyms(word1)
    synonyms2 = get_synonyms(word2)
    return word1 in synonyms2 or word2 in synonyms1

def preprocess_text(text):
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    # Tokenize text
    tokens = word_tokenize(text.lower())
    # Lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return lemmatized_tokens

def named_entity_recognition(text):
    tokens = word_tokenize(text)
    pos_tags = pos_tag(tokens)
    named_entities = ne_chunk(pos_tags)
    return named_entities

def bag_of_words(tokens1, tokens2):
    # Create a set of all unique words from both token lists
    all_words = set(tokens1).union(set(tokens2))
    # Create a frequency distribution of words in each list
    bow1 = Counter(tokens1)
    bow2 = Counter(tokens2)
    # Create vectors for comparison
    vec1 = [bow1[word] for word in all_words]
    vec2 = [bow2[word] for word in all_words]
    return vec1, vec2



### Step 4:: Caluclating  the comparision matrix

In [10]:
def calculate_word_score(predefined_word, transcribed_word):
    if predefined_word.lower() == transcribed_word.lower():
        return 100.0  # Exact match
    elif is_synonym(predefined_word.lower(), transcribed_word.lower()):
        return 50.0  # Synonym match
    elif transcribed_word == "":
        return 0.0  # Missing word
    else:
        # Calculate Levenshtein distance normalized to the length of the predefined word
        distance = Levenshtein.distance(predefined_word.lower(), transcribed_word.lower())
        max_len = max(len(predefined_word), len(transcribed_word))
        accuracy_score = max(0.0, (1 - distance / max_len)) * 100
        return min(accuracy_score, 50.0)

def word_by_word_comparison(predefined_text, transcribed_text):
    predefined_words = preprocess_text(predefined_text)
    transcribed_words = preprocess_text(transcribed_text)

    word_scores = []

    # Compare each word in predefined_text with corresponding word in transcribed_text
    matcher = difflib.SequenceMatcher(None, predefined_words, transcribed_words)
    for tag, i1, i2, j1, j2 in matcher.get_opcodes():
        if tag == 'equal':
            for idx in range(i1, i2):
                word_scores.append((predefined_words[idx], 100.0))
        elif tag == 'delete':
            for idx in range(i1, i2):
                word_scores.append((predefined_words[idx], 0.0))
        elif tag == 'insert':
            for idx in range(j1, j2):
                word_scores.append((transcribed_words[idx], 0.0))
        elif tag == 'replace':
            for idx in range(min(i2 - i1, j2 - j1)):
                predefined_word = predefined_words[i1 + idx]
                transcribed_word = transcribed_words[j1 + idx]
                score = calculate_word_score(predefined_word, transcribed_word)
                word_scores.append((predefined_word, score))

    # Calculate word match accuracy
    total_score = sum(score for _, score in word_scores)
    word_match_accuracy = (total_score / len(predefined_words)) if predefined_words else 0.0

    return word_match_accuracy, word_scores



### Step 5 Calculating accuracy and implemnting the main function

In [11]:
def calculate_accuracy(predefined_text, transcribed_text):
    predefined_words = preprocess_text(predefined_text)
    transcribed_words = preprocess_text(transcribed_text)
    predefined_joined = " ".join(predefined_words)
    transcribed_joined = " ".join(transcribed_words)
    distance = Levenshtein.distance(predefined_joined, transcribed_joined)
    max_len = max(len(predefined_joined), len(transcribed_joined))
    accuracy_score = (1 - distance / max_len) * 100
    return accuracy_score

if __name__ == "__main__":
    predefined_text = input("Enter the predefined text for comparison: ")

    print("Press 'R' to start recording. Speak when ready.")
    keyboard.wait('r')

    start_recording()
    print("Recording... Press 'S' to stop recording.")

    # Wait for 'S' key to stop recording
    keyboard.wait('s')
    stop_recording()

    transcribed_text = transcribe_audio()
    print("Transcribed Text: ", transcribed_text)

    if "Google Web Speech API could not understand audio" in transcribed_text or "Could not request results from Google Web Speech API" in transcribed_text:
        print("Error in transcription.")
    else:
        # Calculate overall accuracy score
        accuracy_score = calculate_accuracy(predefined_text, transcribed_text)
        print(f"\nOverall Accuracy Score: {accuracy_score:.2f}%")

        # Detailed word-by-word comparison
        word_match_accuracy, word_scores = word_by_word_comparison(predefined_text, transcribed_text)
        print("\nDetailed Word-by-Word Match Assessment:")
        for predefined_word, score in word_scores:
            print(f"Predefined Word: {predefined_word}")
            print(f"Transcribed Word: {predefined_word if score == 100.0 else '---'}")
            print(f"Score: {score:.2f}\n")

        print(f"Word Match Accuracy: {word_match_accuracy:.2f}%")

        # Named Entity Recognition (NER)
        predefined_ner = named_entity_recognition(predefined_text)
        transcribed_ner = named_entity_recognition(transcribed_text)
        print("\nNamed Entity Recognition (NER) Comparison:")
        print(f"Predefined NER: {predefined_ner}")
        print(f"Transcribed NER: {transcribed_ner}")

        # Bag of Words (BoW) comparison
        
        predefined_words = preprocess_text(predefined_text)
        transcribed_words = preprocess_text(transcribed_text)
        vec1, vec2 = bag_of_words(predefined_words, transcribed_words)
      
        


Enter the predefined text for comparison:  hello my name is mahrishi


Press 'R' to start recording. Speak when ready.
Recording... Press 'S' to stop recording.
Recording...
Transcribing audio...
Recording finished.
Transcribed Text:  hello my name is Maharishi

Overall Accuracy Score: 96.15%

Detailed Word-by-Word Match Assessment:
Predefined Word: hello
Transcribed Word: hello
Score: 100.00

Predefined Word: my
Transcribed Word: my
Score: 100.00

Predefined Word: name
Transcribed Word: name
Score: 100.00

Predefined Word: is
Transcribed Word: is
Score: 100.00

Predefined Word: mahrishi
Transcribed Word: ---
Score: 50.00

Word Match Accuracy: 90.00%

Named Entity Recognition (NER) Comparison:
Predefined NER: (S hello/NN my/PRP$ name/NN is/VBZ mahrishi/JJ)
Transcribed NER: (S hello/NN my/PRP$ name/NN is/VBZ (PERSON Maharishi/NNP))
