In [1]:
import time
from datetime import date

import speech_recognition as sr

import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from nltk.stem.porter import *
import numpy as np
np.random.seed(400)


In [2]:

# import nltk
# nltk.download('wordnet')

In [3]:
def captureWhatTheUserSpoke(speechRecognizer, microphone):
    # verify that speechRecognizer and microphone arguments are of appropriate type
    if not isinstance(speechRecognizer, sr.Recognizer):
        raise TypeError("`speechRecognizer` must be `Recognizer` instance")

    if not isinstance(microphone, sr.Microphone):
        raise TypeError("`microphone` must be `Microphone` instance")

    # speechRecognizer sensitivity is adjusted for ambient noise before we start recording
    # through the microphone

    with microphone as audioCaptureSource:
        speechRecognizer.adjust_for_ambient_noise(audioCaptureSource)
        capturedAudioObject = speechRecognizer.listen(audioCaptureSource, timeout=10)

    responseReceivedFromGoogleSpeechAPI = {
        "apiRequestWasSuccessful": True,
        "errorEncountered": None,
        "transcribedTextReceived": None
    }

    try:
        responseReceivedFromGoogleSpeechAPI["transcribedTextReceived"] = speechRecognizer.recognize_google(
            capturedAudioObject)
    except sr.RequestError:
        # API was unreachable or unresponsive
        responseReceivedFromGoogleSpeechAPI["apiRequestWasSuccessful"] = False
        responseReceivedFromGoogleSpeechAPI["errorEncountered"] = " Google Speech API unreachable/ unavailable"
    except sr.UnknownValueError:
        # speech was unintelligible or incomprehensible
        responseReceivedFromGoogleSpeechAPI[
            "errorEncountered"] = "Not able to recognize the spoken speech. Pl try again."

    return responseReceivedFromGoogleSpeechAPI

In [4]:
def capturePreferenceOfUser(speechRecognizer, microphone):
    PROMPT_LIMIT = 3

    for prompt in range(PROMPT_LIMIT):
        print( "We are firing up our servers for you in" )
        print( "3...")
        print( "2..")
        print( "1.")
        print('LISTENING ! ')
        userPreference = captureWhatTheUserSpoke(speechRecognizer, microphone)

        # if transcribed text is received successfully, our job is done. Break.
        if userPreference["transcribedTextReceived"]:
            break

        # if API is unreachable, break & print error
        if not userPreference["apiRequestWasSuccessful"]:
            break

        print("I couldn't understand that. What did you say ? \n")

    # if there is an Error, print & terminate all the further processes
    if userPreference["errorEncountered"]:
        print("ERROR ENCOUNTERED: {}".format(userPreference["errorEncountered"]))

    gradingProcessContinues = False
    # we shall continue with grading only if there is NO error
    if not userPreference["errorEncountered"]:
        gradingProcessContinues = True

    userChosenOption = userPreference["transcribedTextReceived"]

    preferenceAndControl = {
        "optionChosen": userChosenOption,
        "processContinuation": gradingProcessContinues
    }

    return preferenceAndControl


In [5]:
def lemmatizeStemming(token):
    stemmer = SnowballStemmer("english")
    return stemmer.stem(WordNetLemmatizer().lemmatize(token, pos='v'))


In [6]:
def textpreProcessor(text):
    # Tokenize and lemmatize the speech text

    result = []
    for token in gensim.utils.simple_preprocess(text):
        if token not in gensim.parsing.preprocessing.STOPWORDS and len(token) > 3:
            result.append(lemmatizeStemming(token))

    return result

In [7]:
def gradeSpeechForRelevance():
    file = open("transcriptionOfUserSpeech.txt", "rt")
    text = file.read()

    processedDocument = textpreProcessor(text)
    # print( processedDocument )

    processedDocumentDataset = [ word.split() for word in processedDocument]
    dictionary = gensim.corpora.Dictionary(processedDocumentDataset)
    
    bow_corpus = [dictionary.doc2bow(processedDocument)]

    lda_model = gensim.models.LdaMulticore(bow_corpus,
                                           num_topics = 8,
                                           id2word = dictionary,
                                           passes = 10,
                                           workers = 2)
    print("====" * 20)
    print()
    
    print( "Based on the spoken text, we believe this speech to fall in one of the below topics (sorted in descending order of likeliness)")
    print()
    
    
    print( "HELP GUIDE")
    print( "Each topic has certain words described to help you decipher the logical bracket in which the topic might fall into")
    print( "Example: if a Topic has words like 'space', 'nasa', 'orbit', 'launch', it probably is about space.")
    print( "Based on the above information, if the topic-of-intent is among the high likelihood topics, we believe your speech was relevant. ")
    print()
    print( " ::: TL;DR -> Score = likeliness. Higher the score, the better. :::")
    bow_vector = dictionary.doc2bow(processedDocument)
    for index, score in sorted(lda_model[bow_vector], key=lambda tup: -1*tup[1]):
        print("Score: {}\t Topic: {}".format(score, lda_model.print_topic(index, 5)))
    
    print()
        
    print("====" * 20)
    print()

In [8]:
def gradeSpeechForGrammar():
    import language_check

    # Get the relevant language tool
    englishLanguageTool = language_check.LanguageTool('en-US')

    totalNumberOfMistakes = 0
    with open(r'transcriptionOfUserSpeech.txt', 'r') as textBlob:
        for line in textBlob:
            matchesFound = englishLanguageTool.check(line)
            totalNumberOfMistakes += len(matchesFound)
            pass

    print("====" * 20)
    print()

    print("GRAMMAR REPORT:")
    print()

    print("Total number of mistakes found in the document:  ", totalNumberOfMistakes)
    print()

    for mistake in matchesFound:
        print(mistake)
        print()

    print()
    print("====" * 20)

In [9]:
def getNumberOfWordsInSpeech():
    file = open("transcriptionOfUserSpeech.txt", "rt")
    textBlob = file.read()
    words = textBlob.split()

    numberOfWordsInSpeech = len(words)
    return numberOfWordsInSpeech

In [10]:
def getRecommendedSpeechDuration(numberOfWordsInSpeech):
    # based on the assumption that 130 words-per-minute is the ideal figure when speaking
    recommended = {
        "seconds": (numberOfWordsInSpeech / 130 * 60),
        "minutes": (numberOfWordsInSpeech / 130)
    }
    return recommended

In [11]:

def getRealTimeSpeechPace(durationOfUserSpeech, numberOfWordsInSpeech):
    pace = {
        "words-per-minute": (numberOfWordsInSpeech / durationOfUserSpeech) * 60,
        "words-per-second": (numberOfWordsInSpeech / durationOfUserSpeech)
    }
    return pace

In [12]:
def populateFillerWordsReport(numberOfWordsInSpeech):
    file = open("transcriptionOfUserSpeech.txt", "rt")
    textBlob = file.read()
    words = textBlob.split()

    fillerWordsList = ['like', 'ok', 'okay', 'right', 'alright', 'ummmm', 'umm', 'um', 'ahh', 'ah', 'errr', 'err',
                       'er', 'oh', 'totally', 'literally', 'well', 'hmmm', 'hmm', 'hm', 'actually', 'basically',
                       'seriously', 'mhm', 'uh']

    numberOfFillersDetected = 0

    for word in words:
        if word in fillerWordsList:
            numberOfFillersDetected += 1

    percentageOfFillerWordsDetected = (numberOfFillersDetected / numberOfWordsInSpeech) * 100
    return percentageOfFillerWordsDetected


In [13]:
def gradeSpeechForFluency(durationOfUserSpeech):
    fluencyPoints = 0

    numberOfWordsInSpeech = getNumberOfWordsInSpeech()

    realTimeSpeechPace = getRealTimeSpeechPace(durationOfUserSpeech, numberOfWordsInSpeech)

    if 130 <= realTimeSpeechPace["words-per-minute"] <= 160:
        fluencyPoints += 5
    elif realTimeSpeechPace["words-per-minute"] < 130:
        fluencyPoints += 3
    else:
        fluencyPoints += 4

    percentageOfFillerWordsDetected = populateFillerWordsReport(numberOfWordsInSpeech)

    if percentageOfFillerWordsDetected >= 1.28:
        fluencyPoints += 2
    else:
        fluencyPoints += 5

    print("====" * 20)
    print()

    print("FLUENCY REPORT:")
    print()

    if fluencyPoints == 10:
        print("Perfect! You are fluent and we predict that you sound confident!")
    elif 8 <= fluencyPoints <= 9:
        print("Good job. We predict there might be a pacing issue. Try our PRO-TIP for improving on further.")
    elif 6 <= fluencyPoints <= 7:
        print("Decent try! Try practicing more to reduce your filler words count.")
    else:
        print("Not to lose heart! Let's try pumping up your fluency. We are here to practice with you.")

    print()
    recommendedSpeechDuration = getRecommendedSpeechDuration(numberOfWordsInSpeech)
    print("PRO-TIP: For optimal fluency, we recommend timing your speech around the",
          recommendedSpeechDuration["minutes"],
          "minutes/", recommendedSpeechDuration["seconds"], "seconds mark")

    print()
    print("====" * 20)

In [14]:
if __name__ == "__main__":
    print(" Welcome to SpeakGrade: Grade Your Speech! ")
    print(" Built by Pulkit Pradeep Gupta ")
    print(" Place of inception : Noida, Uttar Pradesh, India on", date.today())

    print(" What is your preference? ")
    welcomeInstructionsForTheUser = (
        " Say RECORD to start recording, UPLOAD if you wish to upload a pre-recorded file or STOP to terminate:\n "
    )

    # show welcome instructions and wait for 1 second
    print(welcomeInstructionsForTheUser)
    time.sleep(1)

    speechRecognizer = sr.Recognizer()
    microphone = sr.Microphone()

    GRADE_ATTEMPTS = 1000

    for gradeAttempt in range(GRADE_ATTEMPTS):
        preferenceAndControl = capturePreferenceOfUser(speechRecognizer, microphone)

        gradingProcessContinues = preferenceAndControl["processContinuation"]
        userChosenOption = preferenceAndControl["optionChosen"]

        if (gradingProcessContinues):

            if userChosenOption == 'record':
                print('WOOHOO! We are LIVE & recording. Start speaking. ')

                recordingStartTime = time.time()

                spokenWords = captureWhatTheUserSpoke(speechRecognizer, microphone)

                recordingEndTime = time.time()

                OBSERVED_REAL_TIME_ADJUSTMENT = 3
                durationOfUserSpeech = recordingEndTime - recordingStartTime - OBSERVED_REAL_TIME_ADJUSTMENT

                # print("Spoken: {}".format(spokenWords["transcription"]))
                
                try:
                    transcribedAudioFile = 'transcriptionOfUserSpeech.txt'
                    with open(transcribedAudioFile, 'w') as fileToWhichToWrite:
                        fileToWhichToWrite.write(spokenWords["transcribedTextReceived"])
                except:
                    print( "Uh-oh. We encountered an error while transcribing your speech to text file. Pl try again.")
                    


            elif userChosenOption == 'upload' or userChosenOption == 'load':
                print(' On it, Houston! Uploading your speech.')

                audioFileForDemo = sr.AudioFile('audioFileForDemo.wav')

                with audioFileForDemo as source:
                    fileAudio = speechRecognizer.record(source)
                try:
                    transcribedTextFromUploadedAudio = speechRecognizer.recognize_google(fileAudio)
                    fileBearingTranscribedText = 'transcriptionOfUserSpeech.txt'
                    with open(fileBearingTranscribedText, 'w') as fileToWhichToWrite:
                        fileToWhichToWrite.write(transcribedTextFromUploadedAudio)
                except Exception as error:
                    print("Exception Encountered: " + str(error))
                    print("Pl try again.")

                import wave
                import contextlib

                uploadedAudioFileName = 'audioFileForDemo.wav'

                with contextlib.closing(wave.open(uploadedAudioFileName, 'r')) as f:
                    audioFrames = f.getnframes()
                    audioRate = f.getframerate()
                    audioDuration = audioFrames / float(audioRate)
                    durationOfUserSpeech = audioDuration
            else:
                print( "Stopping SpeechGrade. Thanks for trying us out.")
                break

            print("####" * 20)
            print("####" * 20)
            print()
            print('We have finished transcribing your speech. Beginning evaluation...')
            print()

            timeToKillVariable = 0
            for i in range(0, 100000):
                timeToKillVariable += 1

            gradeSpeechForRelevance()
            gradeSpeechForGrammar()
            gradeSpeechForFluency(durationOfUserSpeech)

        else:
            break
            
    print( "Thanks for your time !")

 Welcome to SpeakGrade: Grade Your Speech! 
 Built by Pulkit Pradeep Gupta 
 Place of inception : Noida, Uttar Pradesh, India on 2020-08-23
 What is your preference? 
 Say RECORD to start recording, UPLOAD if you wish to upload a pre-recorded file or STOP to terminate:
 
We are firing up our servers for you in
3...
2..
1.
LISTENING ! 
 On it, Houston! Uploading your speech.
################################################################################
################################################################################

We have finished transcribing your speech. Beginning evaluation...


Based on the spoken text, we believe this speech to fall in one of the below topics (sorted in descending order of likeliness)

HELP GUIDE
Each topic has certain words described to help you decipher the logical bracket in which the topic might fall into
Example: if a Topic has words like 'space', 'nasa', 'orbit', 'launch', it probably is about space.
Based on the above information, if the