# Transcription with Google Cloud Speech to Text API

## Preamble

In [3]:
import pandas
from pathlib import Path

### Paths

**Set this path to the location of the repository**

In [4]:
project_dir = Path("/Users/cls/Documents/Work/Projects/SoundOfAI/podcast-ai-lab/")
project_dir

PosixPath('/Users/cls/Documents/Work/Projects/SoundOfAI/podcast-ai-lab')

**Podcast Episodes**

In [14]:
podcast_dir = project_dir / "data/podcasts/"
podcast_dir

PosixPath('/Users/cls/Documents/Work/Projects/SoundOfAI/podcast-ai-lab/data/podcasts')

## Workflow

### Google Cloud API

In [6]:
from google.cloud import speech
import os
import io
from pathlib import Path

**Create your own API key information and place it in `data/secrets/google-speech-to-text-api-key.json`.**

In [7]:
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = str(project_dir / 'data/secrets/google-speech-to-text-api-key.json')


In [8]:
# create client instance 
speech_client = speech.SpeechClient()

**Example Podcast**

In [21]:
episode_path = podcast_dir / "sample" / "lex_ai_sample_short.mp3"
episode_path

PosixPath('/Users/cls/Documents/Work/Projects/SoundOfAI/podcast-ai-lab/data/podcasts/sample/lex_ai_sample_short.mp3')

In [22]:
def load_speech_audio(path):
    with io.open(path, "rb") as audio_file:
        content = audio_file.read()
        audio = speech.RecognitionAudio(content=content)
        return audio

In [23]:
audio = load_speech_audio(episode_path)

In [24]:
speech_config = speech.RecognitionConfig(
    #encoding=speech.RecognitionConfig.AudioEncoding.MP3,
    enable_automatic_punctuation=True,
    audio_channel_count=1,
    language_code="en-US",
)


In [25]:
response = speech_client.recognize(
    request={
        "config":speech_config, 
        "audio": audio,
    }
)

In [26]:
response

results {
  alternatives {
    transcript: "I like,"
    confidence: 0.4736475646495819
  }
  result_end_time {
    seconds: 56
    nanos: 550000000
  }
  language_code: "en-us"
}
total_billed_time {
  seconds: 165
}

In [27]:
# Reads the response
for result in response.results:
    print("Transcript: {}".format(result.alternatives[0].transcript))

Transcript: I like,


In [28]:
response.results

[alternatives {
  transcript: "I like,"
  confidence: 0.4736475646495819
}
result_end_time {
  seconds: 56
  nanos: 550000000
}
language_code: "en-us"
]