In [10]:
# this is the chibi girls code

from pocketsphinx.pocketsphinx import *
from sphinxbase.sphinxbase import *

import os
import pyaudio
import wave
import audioop
from collections import deque
import time
import math

"""
Written by Sophie Li, 2016
http://blog.justsophie.com/python-speech-to-text-with-pocketsphinx/
"""

class SpeechDetector:
    def __init__(self):
        # Microphone stream config.
        self.CHUNK = 1024  # CHUNKS of bytes to read each time from mic
        self.FORMAT = pyaudio.paInt16
        self.CHANNELS = 1
        self.RATE = 16000

        self.SILENCE_LIMIT = 1  # Silence limit in seconds. The max ammount of seconds where
                           # only silence is recorded. When this time passes the
                           # recording finishes and the file is decoded

        self.PREV_AUDIO = 0.5  # Previous audio (in seconds) to prepend. When noise
                          # is detected, how much of previously recorded audio is
                          # prepended. This helps to prevent chopping the beginning
                          # of the phrase.

        self.THRESHOLD = 4500
        self.num_phrases = -1

        # These will need to be modified according to where the pocketsphinx folder is
        MODELDIR = "../../tools/pocketsphinx/model"
        DATADIR = "../../tools/pocketsphinx/test/data"

        # Create a decoder with certain model
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(MODELDIR, 'en-us/en-us'))
        config.set_string('-lm', os.path.join(MODELDIR, 'en-us/en-us.lm.bin'))
        config.set_string('-dict', os.path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))

        # Creaders decoder object for streaming data.
        self.decoder = Decoder(config)

    def setup_mic(self, num_samples=50):
        """ Gets average audio intensity of your mic sound. You can use it to get
            average intensities while you're talking and/or silent. The average
            is the avg of the .2 of the largest intensities recorded.
        """
        print ("Getting intensity values from mic.")
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT, 
                        channels=self.CHANNELS,
                        rate=self.RATE, 
                        input=True, 
                        frames_per_buffer=self.CHUNK)

        values = [math.sqrt(abs(audioop.avg(stream.read(self.CHUNK), 4)))
                  for x in range(num_samples)]
        values = sorted(values, reverse=True)
        r = sum(values[:int(num_samples * 0.2)]) / int(num_samples * 0.2)
        print (" Finished ")
        print (" Average audio intensity is ", r)
        stream.close()
        p.terminate()

        if r < 3000:
            self.THRESHOLD = 3500
        else:
            self.THRESHOLD = r + 100

    def save_speech(self, data, p):
        """
        Saves mic data to temporary WAV file. Returns filename of saved
        file
        """
        filename = 'output_'+str(int(time.time()))
        # writes data to WAV file
        data = ''.join(data)
        wf = wave.open(filename + '.wav', 'wb')
        wf.setnchannels(1)
        wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        wf.setframerate(16000)  # TODO make this value a function parameter?
        wf.writeframes(data)
        wf.close()
        return filename + '.wav'

    def decode_phrase(self, wav_file):
        self.decoder.start_utt()
        stream = open(wav_file, "rb")
        while True:
          buf = stream.read(1024)
          if buf:
            self.decoder.process_raw(buf, False, False)
          else:
            break
        self.decoder.end_utt()
        words = []
        [words.append(seg.word) for seg in self.decoder.seg()]
        return words

    def run(self):
        """
        Listens to Microphone, extracts phrases from it and calls pocketsphinx
        to decode the sound
        """
        self.setup_mic()

        #Open stream
        p = pyaudio.PyAudio()
        stream = p.open(format=self.FORMAT, 
                        channels=self.CHANNELS, 
                        rate=self.RATE, 
                        input=True, 
                        frames_per_buffer=self.CHUNK)
        print ("* Mic set up and listening. ")

        audio2send = []
        cur_data = ''  # current chunk of audio data
        rel = self.RATE/self.CHUNK
        slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
        #Prepend audio from 0.5 seconds before noise was detected
        prev_audio = deque(maxlen=self.PREV_AUDIO * rel)
        started = False

        while True:
            cur_data = stream.read(self.CHUNK)
            slid_win.append(math.sqrt(abs(audioop.avg(cur_data, 4))))

            if sum([x > self.THRESHOLD for x in slid_win]) > 0:
                if started == False:
                    print ("Starting recording of phrase")
                    started = True
                audio2send.append(cur_data)

            elif started:
                print ("Finished recording, decoding phrase")
                filename = self.save_speech(list(prev_audio) + audio2send, p)
                r = self.decode_phrase(filename)
                print ("DETECTED: ", r)

                # Removes temp audio file
                os.remove(filename)
                # Reset all
                started = False
                slid_win = deque(maxlen=self.SILENCE_LIMIT * rel)
                prev_audio = deque(maxlen=0.5 * rel)
                audio2send = []
                print ("Listening ...")

            else:
                prev_audio.append(cur_data)

        print ("* Done listening")
        stream.close()
        p.terminate()

if __name__ == "__main__":
    sd = SpeechDetector()
    sd.run()

RuntimeError: new_Decoder returned -1

In [4]:
#this is the basic recognition code
import speech_recognition as sr

In [5]:
# initialize the recognizer
r = sr.Recognizer()

In [6]:
 
with sr.Microphone() as source:  
   print("Say something!")  
   audio = r.listen(source)  
   
 # recognize speech using Sphinx  
try:  
   print("Sphinx thinks you said '" + r.recognize_sphinx(audio) + "'")  
except sr.UnknownValueError:  
   print("Sphinx could not understand audio")  
except sr.RequestError as e:  
   print("Sphinx error; {0}".format(e)) 


Say something!


KeyboardInterrupt: 

In [None]:
#https://towardsdatascience.com/how-to-build-a-speech-recognition-bot-with-python-81d0fe3cea9a
#this guys code
def recognize_speech_from_mic(recognizer, microphone):
    """Transcribe speech from recorded from `microphone`.
    Returns a dictionary with three keys:
    "success": a boolean indicating whether or not the API request was
               successful
    "error":   `None` if no error occured, otherwise a string containing
               an error message if the API could not be reached or
               speech was unrecognizable
    "transcription": `None` if speech could not be transcribed,
               otherwise a string containing the transcribed text
    """
    # check that recognizer and microphone arguments are appropriate type
    if not isinstance(recognizer, sr.Recognizer):
        raise TypeError("`recognizer` must be `Recognizer` instance")

    if not isinstance(microphone, sr.Microphone):
        raise TypeError("`microphone` must be `Microphone` instance")

    # adjust the recognizer sensitivity to ambient noise and record audio
    # from the microphone
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source) # #  analyze the audio source for 1 second
        audio = recognizer.listen(source)

    # set up the response object
    response = {
        "success": True,
        "error": None,
        "transcription": None
    }

    # try recognizing the speech in the recording
    # if a RequestError or UnknownValueError exception is caught,
    #   update the response object accordingly
    try:
        response["transcription"] = recognizer.recognize_sphinx(audio)
    except sr.RequestError:
        # API was unreachable or unresponsive
        response["success"] = False
        response["error"] = "API unavailable/unresponsive"
    except sr.UnknownValueError:
        # speech was unintelligible
        response["error"] = "Unable to recognize speech"

    return response

In [None]:
if __name__ == "__main__":
    recognizer = sr.Recognizer()
    mic = sr.Microphone(device_index=1)
    response = recognize_speech_from_mic(recognizer, mic)
    print('\nSuccess : {}\nError   : {}\n\nText from Speech\n{}\n\n{}' \
          .format(response['success'],
                  response['error'],
                  '-'*17,
                  response['transcription']))

In [85]:
#mp3 to text
# importing libraries 
import speech_recognition as sr 
  
import os 
  
from pydub import AudioSegment 
from pydub.silence import split_on_silence 
  
# a function that splits the audio file into chunks 
# and applies speech recognition 
#def silence_based_conversion(path = "alice-medium.wav"): 
def silence_based_conversion(path):  
    # open the audio file stored in 
    # the local system as a wav file. 
    song = AudioSegment.from_wav(path) 
  
    # open a file where we will concatenate   
    # and store the recognized text 
    fh = open("recognized.txt", "w+") 
          
    # split track where silence is 0.5 seconds  
    # or more and get chunks 
    chunks = split_on_silence(song, 
        # must be silent for at least 0.5 seconds 
        # or 500 ms. adjust this value based on user 
        # requirement. if the speaker stays silent for  
        # longer, increase this value. else, decrease it. 
        min_silence_len = 500, 
  
        # consider it silent if quieter than -16 dBFS 
        # adjust this per requirement 
        silence_thresh = -16
    ) 
  
    # create a directory to store the audio chunks. 
    try: 
        os.mkdir('audio_chunks') 
    except(FileExistsError): 
        pass
  
    # move into the directory to 
    # store the audio files. 
    os.chdir('audio_chunks') 
  
    i = 0
    # process each chunk 
    for chunk in chunks: 
              
        # Create 0.5 seconds silence chunk 
        chunk_silent = AudioSegment.silent(duration = 10) 
  
        # add 0.5 sec silence to beginning and  
        # end of audio chunk. This is done so that 
        # it doesn't seem abruptly sliced. 
        audio_chunk = chunk_silent + chunk + chunk_silent 
  
        # export audio chunk and save it in  
        # the current directory. 
        print("saving chunk{0}.wav".format(i)) 
        # specify the bitrate to be 192 k 
        audio_chunk.export("./chunk{0}.wav".format(i), bitrate ='192k', format ="wav") 
  
        # the name of the newly created chunk 
        filename = 'chunk'+str(i)+'.wav'
  
        print("Processing chunk "+str(i)) 
  
        # get the name of the newly created chunk 
        # in the AUDIO_FILE variable for later use. 
        file = filename 
  
        # create a speech recognition object 
        r = sr.Recognizer() 
  
        # recognize the chunk 
        with sr.AudioFile(file) as source: 
            # remove this if it is not working 
            # correctly. 
            r.adjust_for_ambient_noise(source) 
            audio_listened = r.record(source) 
  
        try: 
            # try converting it to text 
            rec = r.recognize_google(audio_listened) 
            # write the output to the file. 
            fh.write(rec+". ") 
  
        # catch any errors. 
        except sr.UnknownValueError: 
            print("Could not understand audio") 
  
        except sr.RequestError as e: 
            print("Could not request results. check your internet connection") 
  
        i += 1
  
    os.chdir('..') 
  
 

In [None]:
 
if __name__ == '__main__': 
          
    print('Enter the audio file path') 
  
    path = input()
    silence_based_conversion(path) 

Enter the audio file path


In [76]:
'''#weird ffprobe error file
from os import path
import pydub
from pydub import AudioSegment

# files                                                                         
src = r"C:/Users/Administrator/Downloads/solitary_reaper.mp3"
dst = r"C:/Users/Administrator/Downloads/test.wav"

# convert wav to mp3 
AudioSegment.ffmpeg = "C:/ffmpeg/bin/ffmpeg.exe"
AudioSegment.ffprobe = "C:/ffmpeg/bin/ffprobe.exe"
sound = AudioSegment.from_mp3(src)
sound.export(dst, format="wav")'''

'#weird ffprobe error file\nfrom os import path\nimport pydub\nfrom pydub import AudioSegment\n\n# files                                                                         \nsrc = r"C:/Users/Administrator/Downloads/solitary_reaper.mp3"\ndst = r"C:/Users/Administrator/Downloads/test.wav"\n\n# convert wav to mp3 \nAudioSegment.ffmpeg = "C:/ffmpeg/bin/ffmpeg.exe"\nAudioSegment.ffprobe = "C:/ffmpeg/bin/ffprobe.exe"\nsound = AudioSegment.from_mp3(src)\nsound.export(dst, format="wav")'

In [75]:
#same eror pprobe
'''
import speech_recognition as sr
from os import path
from pydub import AudioSegment

# convert mp3 file to wav    
AudioSegment.converter = "C:\\ffmpeg\\bin\\ffmpeg.exe"
AudioSegment.ffmpeg = "C:\\ffmpeg\\bin\\ffmpeg.exe"
AudioSegment.ffprobe ="C:\\ffmpeg\\bin\\ffprobe.exe"
sound = AudioSegment.from_mp3("C:/Users/Administrator/Downloads/solitary_reaper.mp3")
sound.export("transcript.wav", format="wav")


# transcribe audio file                                                         
AUDIO_FILE = "transcript.wav"

# use the audio file as the audio source                                        
r = sr.Recognizer()
with sr.AudioFile(AUDIO_FILE) as source:
        audio = r.record(source)  # read the entire audio file                  

        print("Transcription: " + r.recognize_google(audio))'''

'\nimport speech_recognition as sr\nfrom os import path\nfrom pydub import AudioSegment\n\n# convert mp3 file to wav    \nAudioSegment.converter = "C:\\ffmpeg\\bin\\ffmpeg.exe"\nAudioSegment.ffmpeg = "C:\\ffmpeg\\bin\\ffmpeg.exe"\nAudioSegment.ffprobe ="C:\\ffmpeg\\bin\\ffprobe.exe"\nsound = AudioSegment.from_mp3("C:/Users/Administrator/Downloads/solitary_reaper.mp3")\nsound.export("transcript.wav", format="wav")\n\n\n# transcribe audio file                                                         \nAUDIO_FILE = "transcript.wav"\n\n# use the audio file as the audio source                                        \nr = sr.Recognizer()\nwith sr.AudioFile(AUDIO_FILE) as source:\n        audio = r.record(source)  # read the entire audio file                  \n\n        print("Transcription: " + r.recognize_google(audio))'

In [None]:
#another .wav working
import speech_recognition as sr
import os
 
def main():
    sound = r'C:\Users\Administrator\Desktop\PROJECT\UI_design\solitary_reaper.wav'
 
    r = sr.Recognizer()
 
 
    with sr.AudioFile(sound) as source:
        r.adjust_for_ambient_noise(source)
        audio = r.record(source)
    os.remove(sound)
    command=''
    command=r.recognize_google(audio)
    print("Converting Audio To Text ..... ")
    try:
        print("Sphinx thinks you said " +command )
    except sr.UnknownValueError:
        print("Sphinx could not understand audio")
    except sr.RequestError as e:
        print("Sphinx error; {0}".format(e))
 
        #audio = r.listen(source)
 
        #print("Converted Audio Is : \n" + r.recognize_sphinx(audio))
 
 #r.recognize_google(audio)
    #except Exception as e:
       # print("Error {} : ".format(e) )
 
 
if __name__ == "__main__":
    main()

In [15]:
#different google code
import speech_recognition as sr

r = sr.Recognizer()

filename = 'hello.wav'

# Save audio data
f = open(filename, 'wb+')
f.write(sound_bytes)
f.close()

# Read audio data
with sr.AudioFile(audio_filename) as source:
    audio_source = r.record(source)  # read the entire audio file

# Speech Recognition
text = recognizer.recognize_google(audio_data=audio_source,
                                               key='xxx', 'en-US', show_all=False)

SyntaxError: positional argument follows keyword argument (<ipython-input-15-bf0d3c8de6d3>, line 19)

In [None]:
#https://stackoverflow.com/questions/32005310/speech-recognition-python-code-not-working
#another one
#microphone is shit so coldbnt check
#!/usr/bin/ python
import time

import speech_recognition as sr


def recognize_speech_from_mic(recognizer, microphone):
    """Transcribe speech from recorded from `microphone`.

    Returns a dictionary with three keys:
    "success": a boolean indicating whether or not the API request was
           successful
    "error":   `None` if no error occured, otherwise a string containing
           an error message if the API could not be reached or
           speech was unrecognizable
    "transcription": `None` if speech could not be transcribed,
           otherwise a string containing the transcribed text
    """
    # check that recognizer and microphone arguments are appropriate type
    if not isinstance(recognizer, sr.Recognizer):
        raise TypeError("`recognizer` must be `Recognizer` instance")

    if not isinstance(microphone, sr.Microphone):
        raise TypeError("`microphone` must be `Microphone` instance")

    # adjust the recognizer sensitivity to ambient noise and record audio
    # from the microphone
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    # set up the response object
    response = {
        "success": True,
        "error": None,
        "transcription": None
    }

    try:
        response["transcription"] =    recognizer.recognize_google(audio)
    except sr.RequestError:
        # API was unreachable or unresponsive
        response["success"] = False
        response["error"] = "API unavailable"
    except sr.UnknownValueError:
        # speech was unintelligible
        response["error"] = "Unable to recognize speech"

    return response


if __name__ == "__main__":

    NUM_GUESSES = 1
    PROMPT_LIMIT = 2
    # create recognizer and mic instances
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()

    word = "hello world"

    time.sleep(3)

    for i in range(NUM_GUESSES):
        for j in range(PROMPT_LIMIT):
            print('Guess {}. Speak!'.format(i+1))
            guess = recognize_speech_from_mic(recognizer, microphone)
            if guess["transcription"]:
                break
            if not guess["success"]:
                break
            print("I didn't catch that")

        # if there was an error, stop the game
        if guess["error"]:
            print("ERROR: {}".format(guess["error"]))
            break

        # show the user the transcription
        print("You said: {}".format(guess["transcription"]))

        # determine if guess is correct and if any attempts remain
        guess_is_correct = guess["transcription"].lower() == word.lower()
        user_has_more_attempts = i < NUM_GUESSES - 1

        if guess_is_correct:
            print("Correct!".format(word))
            break
        elif user_has_more_attempts:
            print("Incorrect. Try again.\n")
        else:
            print("Sorry, output is not similar to '{}'.".format(word))
            break

# 1st working model. accuracy very bad with sphinx


#https://www.programcreek.com/python/example/107718/speech_recognition.AudioFile

In [None]:
import os
def recognise(wavpath):
    try:
        # Recognize audio
        r = sr.Recognizer()
        with sr.AudioFile(wavpath) as source:
            r.adjust_for_ambient_noise(source)
            audio = r.record(source)  # read the entire audio file

    except Exception as ex:
        return str(ex)

    os.remove(wavpath)

    command = ''

    # recognize speech using Sphinx
    try:
        command = r.recognize_google(audio)
        print("Sphinx or google thinks you said " + command)
    except sr.UnknownValueError:
        print("Sphinx could not understand audio")
    except sr.RequestError as e:
        print("Sphinx error; {0}".format(e))
    return command 

if __name__ == "__main__":
    recognise('solitary_reaper.wav')

##without ambient:
Sphinx thinks you said solitary reaper by William Wordsworth
behold the single in the field Yon solitary Highland lass reaping
and singing by herself stop here or gently pass Alone she cuts 
and bytes grain and sayings on melancholy strain listen to the Vale
profound is overflowing with the sound no Nightingale did efficient more welcome note
s to very bands of Travellers in some Shady harmed among Arabian Sands a
voice search thrilling never was heard in springtime from the
Cuckoo bird Breaking The Silence of cys among the farthest hebrides open tell me
what you sayings about the plaintive numbers love for cold and Happy Father of things
and battles long ago or is it some more humble a familiar matter of 
today some natural sorrow loss of pain that has been Namibia Kane WWE
theme the maiden sign does a first song could have no ending i-sourcing
at home work and over the Sickle bending I listened motionless I'm still
and as a mountain of the hill the music in my heart Viber long after it was heard no more

#with ambient:
Sphinx thinks you said solitary reaper by William Wordsworth single in the field Yon solitary
Highland lass reaping and singing by herself stop here adjunctive alone she cuts and bites The 
Grain and sings a melancholy strain listen to the Vale profound is overflowing with sound no 
Nightingale did efficient more welcome notes to very bands of Travellers in some share Rihand 
among Arabian Sands a voice search thrilling never was heard in springtime from the Cuckoo bird 
Breaking The Silence of cys among the farthest hebrides open tell me what she seems that the plaintive 
numbers love frozen Happy Father of things and battles long ago or is it some more humble a 
familiar matter of today some natural sorrow loss of pain that has been done maybe again what 
are the theme the maiden sign that the first song could have nerve ending I Sourcing at home 
work and out the Sickle bending I listened motion medicine still and as a mountain of the hill 
the music in my heart Viber long after it was heard no more

In [3]:
import os
import speech_recognition as sr
def recognise(wavpath):
    fh = open("recognized.txt", "w+")
    try:
        # Recognize audio
        r = sr.Recognizer()
        with sr.AudioFile(wavpath) as source:
            r.adjust_for_ambient_noise(source)
            audio = r.record(source)  # read the entire audio file

    except Exception as ex:
        return str(ex)

    #os.remove(wavpath)

    command = ''

    # recognize speech using Sphinx
    try:
        command = r.recognize_google(audio)
        print("Sphinx or google thinks you said " + command)
        fh.write(command+". ") 
    except sr.UnknownValueError:
        print("Sphinx could not understand audio")
    except sr.RequestError as e:
        print("Sphinx error; {0}".format(e))
    return command 

if __name__ == "__main__":
    recognise(r'C:\Users\shery\Desktop\PROJECT\UI_Design\solitary_reaper.wav')
    


Sphinx or google thinks you said solitary reaper by William Wordsworth behold her single in the field Yon solitary Highland lass reaping and singing by herself stop here or gently pass Alone she cuts and bites The Grain and sings a melancholy strain listen to the Vale profound is overflowing with sound no Nightingale did efficient more welcome notes to very bands of Travellers in some Shady Honth among Arabian Sands a voice search thrilling never was heard in springtime from the Cuckoo bird Breaking The Silence of cys among the farthest hebrides even tell me what she seems that the plaintive numbers love frozen Happy Father of things and battles long ago or is it summer hanbali familiar matter of today some natural sorrow loss of pain that has been done maybe again what are the theme the maiden sign that the first song could have no vending i-sourcing at home work and how vi roll bending I listened motionless I'm still and as a mountain of the hill the music in my heart Viber long afte

In [8]:
#mp3 to wav conversion
from os import path
from pydub import AudioSegment

# files                                                                         
src = "s_r.mp3"
dst = "test.wav"

# convert wav to mp3         
AudioSegment.converter = "C:\ffmpeg\bin\ffmpeg.exe"
AudioSegment.ffmpeg = "C:\ffmpeg\bin\ffmpeg.exe"
AudioSegment.ffprobe ="C:\ffmpeg\bin\ffprobe.exe"
sound = AudioSegment.from_mp3(src)
sound.export(dst, format="wav")



FileNotFoundError: [WinError 2] The system cannot find the file specified

In [5]:
import os
import glob

#for file in lst:
# convert wav to mp3
os.system(r"ffmpeg -i C:\Users\shery\Desktop\PROJECT\UI_Design\s_r.mp3 -acodec pcm_u8 -ar 22050 C:\Users\shery\Desktop\PROJECT\UI_Design\s_r.wav")  
print("ok")

ok
