## UseCase 2 - Video Transcribe

**References**

1. [Transribe - Python](https://docs.aws.amazon.com/code-library/latest/ug/python_3_transcribe_code_examples.html)

In [None]:
import configparser
import time
import boto3
import json
import logging
import urllib.request
import random
from botocore.exceptions import ClientError

### Initiate Services

In [None]:
logging.basicConfig(
    format="%(levelname)s - %(asctime)s - %(message)s", level=logging.ERROR
)

logger = logging

### Reading Config File

In [None]:
CONFIG_FILENAME=".config.ini"

config = configparser.ConfigParser()
config.read(CONFIG_FILENAME)

def get_value_by_section_and_key(section, key):
        """get_value_by_section_and_key"""
        return config.get(section, key)

def get_all_details_of_section(section) -> dict:
    """get_all_details_of_section"""
    return dict(config.items(section))


default_cfgs = get_all_details_of_section("DEFAULT")

### Create boto3 client - BEDROCK

In [None]:
transcribe_client = boto3.client(
    'transcribe',
    region_name=default_cfgs.get("aws_default_region", ""),
    aws_access_key_id=default_cfgs.get("aws_access_key_id", ""),
    aws_secret_access_key=default_cfgs.get("aws_secret_access_key", ""),
    aws_session_token=default_cfgs.get("aws_session_token", "")
)


### Transcribe File

In [None]:
def transcribe_file(job_name, file_uri, transcribe_client):
    transcribe_client.start_transcription_job(
        TranscriptionJobName=job_name,
        Media={"MediaFileUri": file_uri},
        MediaFormat="mp4",
        LanguageCode="en-US",
    )

    max_tries = 60
    while max_tries > 0:
        transcript_url = ""
        max_tries -= 1
        job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
        job_status = job["TranscriptionJob"]["TranscriptionJobStatus"]
        if job_status in ["COMPLETED", "FAILED"]:
            print(f"Job {job_name} is {job_status}.")
            if job_status == "COMPLETED":
                print(
                    f"Download the transcript from\n"
                    f"\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}."
                )
                transcript_url = job['TranscriptionJob']['Transcript']['TranscriptFileUri']
            break
        else:
            print(f"Waiting for {job_name}. Current status is {job_status}.")
        time.sleep(10)

    text = ""
    if(len(transcript_url) > 0):
        response = urllib.request.urlopen(transcript_url)
        data = json.loads(response.read())
        
        # Extract the transcript text
        text = data['results']['transcripts'][0]['transcript']

    return text

BUCKET_NAME="tr-hackathon"
MEDIAFILE="usecase-exp/nirmala-sitharaman-fm-parliament.mp4"
file_uri = "s3://" + BUCKET_NAME + "/" + MEDIAFILE
job_id = "Transcribe-job-"+str(random.randint(9, 10054))
transcibed_text = transcribe_file(job_id, file_uri, transcribe_client)
