In [6]:
# Imports and .env variables

import boto3
import time
import urllib
import json
import os
from dotenv import load_dotenv

load_dotenv()

AWS_ACCESS_KEY_ID = os.getenv('aws_access_key')
AWS_SECRET_ACCESS_KEY = os.getenv('aws_secret_access_key')


In [10]:
# Set up the client

job_name = 'transcribe_job_001'
job_uri = 'https://s3.amazonaws.com/transcribe-bucket-001/media/Obama.mp3'

Transcribe = boto3.client('transcribe', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region_name='us-west-1')


In [11]:
# Run Transcribe Job

Transcribe.start_transcription_job(TranscriptionJobName=job_name, 
                                   Media={'MediaFileUri': job_uri}, 
                                   MediaFormat='mp3', 
                                   LanguageCode='en-US')

{'TranscriptionJob': {'TranscriptionJobName': 'transcribe_job_001',
  'TranscriptionJobStatus': 'IN_PROGRESS',
  'LanguageCode': 'en-US',
  'MediaFormat': 'mp3',
  'Media': {'MediaFileUri': 'https://s3.amazonaws.com/transcribe-bucket-001/media/Obama.mp3'},
  'StartTime': datetime.datetime(2023, 1, 23, 11, 34, 9, 204000, tzinfo=tzlocal()),
  'CreationTime': datetime.datetime(2023, 1, 23, 11, 34, 9, 178000, tzinfo=tzlocal())},
 'ResponseMetadata': {'RequestId': '8131cc4b-3ff7-43f9-9e39-3cdb83b0a3a7',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '8131cc4b-3ff7-43f9-9e39-3cdb83b0a3a7',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '299',
   'date': 'Mon, 23 Jan 2023 19:34:08 GMT'},
  'RetryAttempts': 0}}

In [14]:
# Check the Job Status


while True:
    status = Transcribe.get_transcription_job(TranscriptionJobName=job_name)
    if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
        break
    print("Not ready yet...")
    time.sleep(2)
print(status)


{'TranscriptionJob': {'TranscriptionJobName': 'transcribe_job_001', 'TranscriptionJobStatus': 'COMPLETED', 'LanguageCode': 'en-US', 'MediaSampleRateHertz': 44100, 'MediaFormat': 'mp3', 'Media': {'MediaFileUri': 'https://s3.amazonaws.com/transcribe-bucket-001/media/Obama.mp3'}, 'Transcript': {'TranscriptFileUri': 'https://s3.us-west-1.amazonaws.com/aws-transcribe-us-west-1-prod/299825267603/transcribe_job_001/9d161b47-b899-4434-bf64-5d69a74cbf1a/asrOutput.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEHMaCXVzLXdlc3QtMSJHMEUCIF6XkqLH6cpG60TGBFn7xxu6eWLbGA%2BrcIDXzxTmJQjjAiEApkISXYWzrE8biySbZ%2BKGTPW84zSzJeUa9V5skJJiuJ8q1QQI3P%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgw5NzEzODk5ODIxNjgiDBTrYHPLMouQHhVjMCqpBNkICzLw%2FppbZDRlS6XoXLEyjVIeYRcK7l4d7Lx5oQckRAA3hbSzWMkZ%2F8CQSnY5hzUGV0Ryrz16k7yDe2ApmX%2BwN%2BkKz8GiKihIuYEEkKMrFBrRPYYFPHcoSdrRRWKsesR7vf3wD5PIqShBCWa1IYHVm01rfIR5JcZHDA4USVcw%2B8FvFvwZP5gppeiEO7c1zfS2hS8C1V4mVI%2FwhgnLls3T9FanWRhwgiynT79n51IYeXPIkcmn9hQQLft89U%2FK6PkWtdr08TBtSlImOb21h4V84y6f0

In [22]:
# Get the .json output from the transcription job

transcription_url = status["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]
transcription_url

# go to this url, it will initiate download of json output file

'https://s3.us-west-1.amazonaws.com/aws-transcribe-us-west-1-prod/299825267603/transcribe_job_001/9d161b47-b899-4434-bf64-5d69a74cbf1a/asrOutput.json?X-Amz-Security-Token=IQoJb3JpZ2luX2VjEHMaCXVzLXdlc3QtMSJHMEUCIF6XkqLH6cpG60TGBFn7xxu6eWLbGA%2BrcIDXzxTmJQjjAiEApkISXYWzrE8biySbZ%2BKGTPW84zSzJeUa9V5skJJiuJ8q1QQI3P%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgw5NzEzODk5ODIxNjgiDBTrYHPLMouQHhVjMCqpBNkICzLw%2FppbZDRlS6XoXLEyjVIeYRcK7l4d7Lx5oQckRAA3hbSzWMkZ%2F8CQSnY5hzUGV0Ryrz16k7yDe2ApmX%2BwN%2BkKz8GiKihIuYEEkKMrFBrRPYYFPHcoSdrRRWKsesR7vf3wD5PIqShBCWa1IYHVm01rfIR5JcZHDA4USVcw%2B8FvFvwZP5gppeiEO7c1zfS2hS8C1V4mVI%2FwhgnLls3T9FanWRhwgiynT79n51IYeXPIkcmn9hQQLft89U%2FK6PkWtdr08TBtSlImOb21h4V84y6f0vN5puXV76QH58Fl9cV9LhDsixj66Z4UhwXohO%2B4tCy8YERdp4xYQh7Z9O2dzCxcotMbsd6%2BwPkyOIYEcsjvUVD35A85ZPRwDMA8feXhWv61Ly5nqLhg%2BaBMmAh8XSoPbEoo%2F79NuUI7u4WDc4WCdtKYYTD6EIB3OCvxeQkZ2uOPPZxPFTZ2Q5B0h6l7gTPi9DIHn%2FbVdgMVh9Lur4yxf3438MWaLKhTVmwlDpbjjfhntpGFiRUYZGWrj5lbZohrd44yE6q71T7fvSB3Hg155cwufNGlHlitEEBc%2BkTNXRhi80Y

In [26]:
# Get the transcription

with open('asrOutput.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
    text = data['results']['transcripts'][0]['transcript']
    print(text)

Hello Chicago. If there is anyone out there who still doubts that America is a place where all things are possible, who still wonders if the dream of our founders is alive in our time, who still questions the power of our democracy. Tonight is your answer. It's the answer told by lines that stretched around schools and churches in numbers. This nation has never seen by people who waited three hours and four hours, many for the first time in their lives because they believed that this time must be different, That their voices could be that difference. It's the answer spoken by young and old, rich and poor democrat and republican, black, white hispanic, asian, native american, gay, straight disabled and not disabled americans who sent a message to the world that we have never been just a collection of individuals or a collection of red states and blue states. We are and always will be the United States of America. It's the answer that led those who've been told for so long by so many to 