In [2]:
from IPython.display import Audio

# Transcribing audio using the Cloud Speech to Text API.

This notebooks shows how you can use the Google Cloud Speech to Text API to transcribe audio and the Google Cloud Natural Language API to categorize the text. 

We'll use the Google Cloud Client Libraries to do this.

## Let's transcribe the State of the Union speech by President Barack Obama.

Here's an except from that speech:

In [3]:
with open('data/speech.wav', 'rb') as fh:
    audio_content = fh.read()
    
Audio(audio_content)

## Use the client library to transcribe this audio.

In [4]:
from google.cloud import speech

In [5]:
def transcribe(audio_content):
    client = speech.SpeechClient()

    audio = speech.types.RecognitionAudio(content=audio_content)
    config = speech.types.RecognitionConfig(
        encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=24000,
        language_code='en-US')
    
    response = client.recognize(config, audio)

    return response

In [6]:
response = transcribe(audio_content)
response

results {
  alternatives {
    transcript: "51 years ago John F Kennedy declared to this chamber"
    confidence: 0.9799562692642212
  }
}
results {
  alternatives {
    transcript: " that the constitution makes us not Rivals for power"
    confidence: 0.9764183759689331
  }
}
results {
  alternatives {
    transcript: " partners for progress"
    confidence: 0.9175500869750977
  }
}
results {
  alternatives {
    transcript: " it is my task he said to report the State of the Union"
    confidence: 0.9736822843551636
  }
}
results {
  alternatives {
    transcript: " to improve it is a task of us all"
    confidence: 0.8957697749137878
  }
}

### Print the transcript

The API returns the transcript in parts because it may have different confidences in each phrase.

In [7]:
transcript = ''.join([result.alternatives[0].transcript for result in response.results])
transcript

'51 years ago John F Kennedy declared to this chamber that the constitution makes us not Rivals for power partners for progress it is my task he said to report the State of the Union to improve it is a task of us all'

## Classifying the text using the Natural Language API

The Natural Language API does text analysis and can pull out important entities and classify the overall document.

In [8]:
from google.cloud import language
language_client = language.LanguageServiceClient()

In [9]:
document = language.types.Document(
    content=transcript,
    type=language.enums.Document.Type.PLAIN_TEXT)
response = language_client.annotate_text(document, features={'extract_entities': True, 'classify_text': True})
response

entities {
  name: "task"
  type: OTHER
  salience: 0.23945260047912598
  mentions {
    text {
      content: "task"
      begin_offset: -1
    }
    type: COMMON
  }
  mentions {
    text {
      content: "task"
      begin_offset: -1
    }
    type: COMMON
  }
}
entities {
  name: "State of the Union"
  type: WORK_OF_ART
  metadata {
    key: "mid"
    value: "/m/097w2"
  }
  metadata {
    key: "wikipedia_url"
    value: "https://en.wikipedia.org/wiki/State_of_the_Union"
  }
  salience: 0.20812414586544037
  mentions {
    text {
      content: "State of the Union"
      begin_offset: -1
    }
    type: PROPER
  }
}
entities {
  name: "John F Kennedy"
  type: PERSON
  metadata {
    key: "mid"
    value: "/m/0d3k14"
  }
  metadata {
    key: "wikipedia_url"
    value: "https://en.wikipedia.org/wiki/John_F._Kennedy"
  }
  salience: 0.18119539320468903
  mentions {
    text {
      content: "John F Kennedy"
      begin_offset: -1
    }
    type: PROPER
  }
}
entities {
  name: "const