### 음성인식(Speech Recognition)
참고 페이지 : https://realpython.com/python-speech-recognition/#installing-speechrecognition

In [1]:
import speech_recognition as sr
sr.__version__

'3.8.1'

In [2]:
r = sr.Recognizer()
harvard = sr.AudioFile('c:/Users/KimMinyoung/python-speech-recognition-master/python-speech-recognition-master/audio_files/harvard.wav')

In [3]:
with harvard as source:
    audio = r.record(source)
type(audio)

speech_recognition.AudioData

In [4]:
r.recognize_google(audio)

'the stale smell of old beer lingers it takes heat to bring out the odor a cold dip restores health and zest a salt pickle taste fine with ham tacos al Pastore are my favorite a zestful food is be hot cross bun'

### Capturing Segments with offset and duration

In [5]:
with harvard as source:
    audio = r.record(source,duration = 4)
r.recognize_google(audio)

'the stale smell of old beer lingers'

In [6]:
with harvard as source:
    audio1 = r.record(source, duration = 4)
    audio2 = r.record(source, duration = 4)

In [7]:
r.recognize_google(audio1)

'the stale smell of old beer lingers'

In [8]:
r.recognize_google(audio2)

'it takes heat to bring out the odor a cold dip'

In [9]:
with harvard as source:
    audio = r.record(source, offset=4, duration=3)
r.recognize_google(audio)

'it takes heat to bring out the odor'

In [10]:
with harvard as source:
    audio = r.record(source, offset=  4.7, duration = 2.8)
r.recognize_google(audio)

'takes heat to bring out the odor Aiko'

### The Effect of Noise on Speech Recognition

In [11]:
jackhammer = sr.AudioFile('c:/Users/KimMinyoung/python-speech-recognition-master/python-speech-recognition-master/audio_files/jackhammer.wav')
with jackhammer as source:
    audio = r.record(source)
r.recognize_google(audio)

'the stale smell of old gear vendors'

In [12]:
with jackhammer as source:
    r.adjust_for_ambient_noise(source, duration=0.5)
    audio = r.record(source)
r.recognize_google(audio)

'the snail smell of old gear vendors'

In [13]:
r.recognize_google(audio, show_all=True)

{'alternative': [{'transcript': 'the stale smell of old gear vendors'},
  {'transcript': 'does still smell old gear vendors'},
  {'transcript': 'the smell smell of old gear vendors'},
  {'transcript': 'does still smell of old gear vendors'},
  {'transcript': 'the snail smell of old gear vendors'},
  {'transcript': 'the snail smelly old gear vendors'},
  {'transcript': 'does still smell olleh gear vendors'},
  {'transcript': 'does still smell bedgear vendors'}],
 'final': True}

## The Microphone Class

In [31]:
import speech_recognition as sr
r = sr.Recognizer()
sr.Microphone.list_microphone_names()

['Microsoft 사운드 매퍼 - Input',
 '마이크(Conexant HD Audio)',
 'Microsoft 사운드 매퍼 - Output',
 '스피커(Conexant HD Audio)',
 'Speakers (Conexant HD Audio output)',
 '마이크 (Conexant HD Audio capture)',
 '헤드폰 ()',
 '머리에 거는 수화기 (@System32\\drivers\\bthhfenum.sys,#2;%1 Hands-Free AG Audio%0\r\n;(풍뎅이))',
 '머리에 거는 수화기 (@System32\\drivers\\bthhfenum.sys,#2;%1 Hands-Free AG Audio%0\r\n;(풍뎅이))']

In [32]:
mic = sr.Microphone(device_index=1) # 마이크 : device_index=1

In [33]:
with mic as source:
    print("시작");
    audio = r.listen(source)
    print("끝")

try:
    print("You said " + r.recognize_google(audio))
except:
    print("Could not understand audio")

시작
끝
You said hello


In [34]:
r.recognize_google(audio)

'hello'

In [35]:
with mic as source:
    r.adjust_for_ambient_noise(source)
    audio = r.listen(source)

### "Guess the Word" Game

In [36]:
import random
import time

import speech_recognition as sr

In [37]:
def recognize_speech_from_mic(recognizer, microphone):
    """Transcribe speech from recorded from `microphone`.

    Returns a dictionary with three keys:
    "success": a boolean indicating whether or not the API request was
               successful
    "error":   `None` if no error occured, otherwise a string containing
               an error message if the API could not be reached or
               speech was unrecognizable
    "transcription": `None` if speech could not be transcribed,
               otherwise a string containing the transcribed text
    """
    # check that recognizer and microphone arguments are appropriate type
    if not isinstance(recognizer, sr.Recognizer):
        raise TypeError("`recognizer` must be `Recognizer` instance")

    if not isinstance(microphone, sr.Microphone):
        raise TypeError("`microphone` must be `Microphone` instance")

    # adjust the recognizer sensitivity to ambient noise and record audio
    # from the microphone
    with microphone as source:
        recognizer.adjust_for_ambient_noise(source)
        audio = recognizer.listen(source)

    # set up the response object
    response = {
        "success": True,
        "error": None,
        "transcription": None
    }

    # try recognizing the speech in the recording
    # if a RequestError or UnknownValueError exception is caught,
    #     update the response object accordingly
    try:
        response["transcription"] = recognizer.recognize_google(audio)
    except sr.RequestError:
        # API was unreachable or unresponsive
        response["success"] = False
        response["error"] = "API unavailable"
    except sr.UnknownValueError:
        # speech was unintelligible
        response["error"] = "Unable to recognize speech"

    return response


if __name__ == "__main__":
    # set the list of words, maxnumber of guesses, and prompt limit
    WORDS = ["apple", "banana", "grape", "orange", "mango", "lemon"]
    NUM_GUESSES = 3
    PROMPT_LIMIT = 5

    # create recognizer and mic instances
    recognizer = sr.Recognizer()
    microphone = sr.Microphone()

    # get a random word from the list
    word = random.choice(WORDS)

    # format the instructions string
    instructions = (
        "I'm thinking of one of these words:\n"
        "{words}\n"
        "You have {n} tries to guess which one.\n"
    ).format(words=', '.join(WORDS), n=NUM_GUESSES)

    # show instructions and wait 3 seconds before starting the game
    print(instructions)
    time.sleep(3)

    for i in range(NUM_GUESSES):
        # get the guess from the user
        # if a transcription is returned, break out of the loop and
        #     continue
        # if no transcription returned and API request failed, break
        #     loop and continue
        # if API request succeeded but no transcription was returned,
        #     re-prompt the user to say their guess again. Do this up
        #     to PROMPT_LIMIT times
        for j in range(PROMPT_LIMIT):
            print('Guess {}. Speak!'.format(i+1))
            guess = recognize_speech_from_mic(recognizer, microphone)
            if guess["transcription"]:
                break
            if not guess["success"]:
                break
            print("I didn't catch that. What did you say?\n")

        # if there was an error, stop the game
        if guess["error"]:
            print("ERROR: {}".format(guess["error"]))
            break

        # show the user the transcription
        print("You said: {}".format(guess["transcription"]))

        # determine if guess is correct and if any attempts remain
        guess_is_correct = guess["transcription"].lower() == word.lower()
        user_has_more_attempts = i < NUM_GUESSES - 1

        # determine if the user has won the game
        # if not, repeat the loop if user has more attempts
        # if no attempts left, the user loses the game
        if guess_is_correct:
            print("Correct! You win!".format(word))
            break
        elif user_has_more_attempts:
            print("Incorrect. Try again.\n")
        else:
            print("Sorry, you lose!\nI was thinking of '{}'.".format(word))
            break

I'm thinking of one of these words:
apple, banana, grape, orange, mango, lemon
You have 3 tries to guess which one.

Guess 1. Speak!
You said: Apple
Incorrect. Try again.

Guess 2. Speak!
You said: Gray
Incorrect. Try again.

Guess 3. Speak!
You said: Orange
Sorry, you lose!
I was thinking of 'grape'.


In [None]:
response = {
    "success": True,
    "error": None,
    "transcription": None
}

try:
    response["transcription"] = recognizer.recognize_google(audio)
except sr.RequestError:
    # API was unreachable or unresponsive
    response["success"] = False
    response["error"] = "API unavailable"
except sr.UnknownValueError:
    # speech was unintelligible
    response["error"] = "Unable to recognize speech"

return response

In [None]:
from guessing_game import recognize_speech_from_mic
r= sr.Recognizer()
m=sr.Microphone()
recognize_speech_from_mic(r,m)

In [None]:
WORDS = ['apple', 'banana', 'grape', 'orange', 'mango', 'lemon']
NUM_GUESSES = 3
PROMPT_LIMIT = 5

In [None]:
recognizer = sr.Recognizer()
microphone = sr.Microphone()
word = random.choice(WORDS)

In [None]:
for j in range(PROMPT_LIMIT):
    print('Guess {}. Speak!'.format(i+1))
    guess = recognize_speech_from_mic(recognizer, microphone)
    if guess["transcription"]:
        break
    if not guess["success"]:
        break
    print("I didn't catch that. What did you say?\n")

In [None]:
if guess['error']:
    print("ERROR: {}".format(guess["error"]))
    break

In [None]:
guess_is_correct = guess["transcription"].lower() == word.lower()
user_has_more_attempts = i < NUM_GUESSES - 1

if guess_is_correct:
    print('Correct! You win!'.format(word))
    break
elif user_has_more_attempts:
    print('Incorrect. Try again.\n')
else:
    print("Sorry, you lose!\nI was thinking of '{}'.".format(word))
    break