In [None]:
from IPython.display import Audio

# Transcribing audio using the Cloud Speech to Text API.

This notebooks shows how you can use the Google Cloud Speech to Text API to transcribe audio and the Google Cloud Natural Language API to categorize the text. 

We'll use the Google Cloud Client Libraries to do this.

## Let's transcribe the State of the Union speech by President Barack Obama.

Here's an except from that speech:

In [None]:
with open('data/speech.wav', 'rb') as fh:
    audio_content = fh.read()
    
Audio(audio_content)

## Use the client library to transcribe this audio.

In [None]:
from google.cloud import speech

In [None]:
def transcribe(audio_content):
    client = speech.SpeechClient()

    audio = speech.types.RecognitionAudio(content=audio_content)
    config = speech.types.RecognitionConfig(
        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=24000,
        language_code='en-US')
    
    response = client.recognize(config, audio)

    return response

In [None]:
response = transcribe(audio_content)
response

### Print the transcript

The API returns the transcript in parts because it may have different confidences in each phrase.

In [None]:
transcript = ''.join([result.alternatives[0].transcript for result in response.results])
transcript

## Classifying the text using the Natural Language API

The Natural Language API does text analysis and can pull out important entities and classify the overall document.

In [None]:
from google.cloud import language
language_client = language.LanguageServiceClient()

In [None]:
document = language.types.Document(
    content=transcript,
    type=language.enums.Document.Type.PLAIN_TEXT)
response = language_client.annotate_text(document, features={'extract_entities': True, 'classify_text': True})
response