In [1]:
import time
import azure.cognitiveservices.speech as speechsdk

In [2]:
def check_result(result):
    # Checks result.
    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        print("Recognized: {}".format(result.text))
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print("No speech could be recognized: {}".format(result.no_match_details))
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = result.cancellation_details
        print("Speech Recognition canceled: {}".format(
            cancellation_details.reason))
        if cancellation_details.reason == speechsdk.CancellationReason.Error:
            print("Error details: {}".format(
                cancellation_details.error_details))

## 1 . Create an instance of a speech config
   -  Need to get the key from azure speech
       - specified subscription key
       - service region( see https://aka.ms/speech/sdkregion)

In [3]:
speech_key = 'speech_key_from_azure'
service_region = "koreacentral"
speech_config = speechsdk.SpeechConfig(
    subscription=speech_key, region=service_region)

## 2. Create an audio configuration that points to an audio file.

In [4]:
audio_filename = "/Users/1110647/projects/azure_speech_test/audio2.wav"
audio_input = speechsdk.AudioConfig(filename=audio_filename)

## 3. Create a recognizer with the given settings

In [8]:
speech_recognizer = speechsdk.SpeechRecognizer(
    speech_config=speech_config, audio_config=audio_input)

## 4.  STT for single utterance
 - listning for silence at the end of a speech
 - maximum of 15 seconds

In [6]:
print("Recognizing single utterance...")
result = speech_recognizer.recognize_once()
check_result(result)

Recognizing single utterance...
Recognized: Custom speech provides tools that allow you to visually inspect the recognition quality of a model by comparing audio data with the corresponding recognition result from the custom speech portal. You can playback uploaded audio and determine if the provided recognition result is correct this tool allows you to quickly inspect quality of Microsoft's baseline speech to text model or a trained custom model without having to transcribe any audio data.


## 5. STT for long-running multi-utterance
  - User must subscribe to events to receive recognition results.

In [9]:
done = False
result = False

# Define Callbacks
def stop_cb(evt):
    """callback that signals to stop continuous recognition upon receiving an event `evt`"""
    print('CLOSING on {}'.format(evt))
    global done
    done = True

def return_result(evt):
    """callback that signals to recognized result upon receiving an event `evt`"""
    print('RECOGNIZED on {}'.format(evt))
    global result
    result = evt.result

# Register Callbacks
speech_recognizer.recognized.connect(return_result)
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)

# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()

# Wait while finishes processing stt.
while not done:
    time.sleep(.5)
# Stop continuous speech recognition
speech_recognizer.stop_continuous_recognition()
# Print result
check_result(result)

RECOGNIZED on SpeechRecognitionEventArgs(session_id=64b0cf50e18e409ebdbb89f5f178d1e5, result=SpeechRecognitionResult(result_id=8cd0f9fef07f4e8db7bdee4b0f76aa80, text="Custom speech provides tools that allow you to visually inspect the recognition quality of a model by comparing audio data with the corresponding recognition result from the custom speech portal. You can playback uploaded audio and determine if the provided recognition result is correct this tool allows you to quickly inspect quality of Microsoft's baseline speech to text model or a trained custom model without having to transcribe any audio data.", reason=ResultReason.RecognizedSpeech))
CLOSING on SpeechRecognitionCanceledEventArgs(session_id=64b0cf50e18e409ebdbb89f5f178d1e5, result=SpeechRecognitionResult(result_id=b8a78e7849e24c219ee3f6eea0512e93, text="", reason=ResultReason.Canceled))
CLOSING on SessionEventArgs(session_id=64b0cf50e18e409ebdbb89f5f178d1e5)
Recognized: Custom speech provides tools that allow you to vi