In [2]:
import boto3
import os
import time
import urllib
import json
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock
from reportlab.pdfgen import canvas
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import S3FileLoader

def transcribe_and_create_pdf(language_code='en-US',
                               job_name_prefix='My-transcription', pdf_filename_prefix='Transcription_output'):
    transcribe_client = boto3.client('transcribe')

    def transcribe_file(job_name, file_uri, transcribe_client, media_format, language_code):
        transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': file_uri},
            MediaFormat=media_format,
            LanguageCode=language_code
        )
        max_tries = 60
        while max_tries > 0:
            max_tries -= 1
            job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
            job_status = job['TranscriptionJob']['TranscriptionJobStatus']
            if job_status in ['COMPLETED', 'FAILED']:
                print(f"Job {job_name} is {job_status}.")
                if job_status == 'COMPLETED':
                    response = urllib.request.urlopen(job['TranscriptionJob']['Transcript']['TranscriptFileUri'])
                    data = json.loads(response.read())
                    text = data['results']['transcripts'][0]['transcript']
                break
            else:
                print(f"Waiting for {job_name}. Current status is {job_status}.")
            time.sleep(10)
        return text

    def process_media_format(media_format):
        timestamp = str(int(time.time()))
        job_name = f'{job_name_prefix}_{media_format}_{timestamp}'
        pdf_filename = f'{pdf_filename_prefix}_{media_format}_{timestamp}.pdf'

        file_uris = []
        while True:
            file_uri = input(f"Enter {media_format} file URI (enter 'done' when finished): ")
            if file_uri.lower() == 'done':
                break
            file_uris.append(file_uri)

        transcribed_text = ""
        for i, file_uri in enumerate(file_uris):
            text = transcribe_file(f"{job_name}_{i}", file_uri, transcribe_client, media_format, language_code)
            transcribed_text += f"Transcribed Text ({media_format.upper()}) - File {i + 1}:\n{text}\n\n"

            # Create PDF
            pdf = canvas.Canvas(f"{pdf_filename}")
            pdf.setFont("Helvetica", 12)
            pdf.drawString(10, 800, f"Transcribed Text ({media_format.upper()}) - File {i + 1}:")
            text_lines = text.split('\n')
            for j, line in enumerate(text_lines):
                pdf.drawString(10, 780 - j * 15, line)
            pdf.save()

        # Upload PDFs to S3
        s3_bucket_name = 'my-s3-doc-loader'
        s3_keys = [f"{pdf_filename}"]
        s3_client = boto3.client('s3')

        for i, s3_key in enumerate(s3_keys):
            with open(s3_key, 'rb') as pdf_file:
                s3_client.upload_fileobj(pdf_file, s3_bucket_name, s3_key)

        return transcribed_text, [f's3://{s3_bucket_name}/{key}' for key in s3_keys]

    # Process MP3
    transcribed_text_mp3, s3_uris_mp3 = process_media_format('mp3')

    # Process MP4
    transcribed_text_mp4, s3_uris_mp4 = process_media_format('mp4')

    return transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4

# Call the function and get the transcribed text and S3 URIs for both MP3 and MP4
transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4 = transcribe_and_create_pdf()

# Print or use transcribed_text and s3_uris as needed for both MP3 and MP4
print("Transcribed Text (MP3):\n", transcribed_text_mp3)
print("PDFs saved to (MP3):\n", "\n".join(s3_uris_mp3))
print("\n")
print("Transcribed Text (MP4):\n", transcribed_text_mp4)
print("PDFs saved to (MP4):\n", "\n".join(s3_uris_mp4))


Waiting for My-transcription_mp3_1700802237_0. Current status is IN_PROGRESS.
Waiting for My-transcription_mp3_1700802237_0. Current status is IN_PROGRESS.
Job My-transcription_mp3_1700802237_0 is COMPLETED.


ParamValidationError: Parameter validation failed:
Invalid length for parameter Media.MediaFileUri, value: 0, valid min length: 1

In [3]:
import boto3
import os
import time
import urllib
import json
from io import BytesIO
from reportlab.pdfgen import canvas

def transcribe_and_create_pdf(language_code='en-US',
                               job_name_prefix='My-transcription', pdf_filename_prefix='Transcription_output'):
    transcribe_client = boto3.client('transcribe')

    def transcribe_file(job_name, file_uri, transcribe_client, media_format, language_code):
        transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': file_uri},
            MediaFormat=media_format,
            LanguageCode=language_code
        )
        max_tries = 60
        while max_tries > 0:
            max_tries -= 1
            job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
            job_status = job['TranscriptionJob']['TranscriptionJobStatus']
            if job_status in ['COMPLETED', 'FAILED']:
                print(f"Job {job_name} is {job_status}.")
                if job_status == 'COMPLETED':
                    response = urllib.request.urlopen(job['TranscriptionJob']['Transcript']['TranscriptFileUri'])
                    data = json.loads(response.read())
                    text = data['results']['transcripts'][0]['transcript']
                break
            else:
                print(f"Waiting for {job_name}. Current status is {job_status}.")
            time.sleep(10)
        return text

    def process_media_format(media_format):
        timestamp = str(int(time.time()))
        job_name = f'{job_name_prefix}_{media_format}_{timestamp}'

        file_uris = []
        while True:
            file_uri = input(f"Enter {media_format} file URI (enter 'done' when finished): ")
            if file_uri.lower() == 'done':
                break
            file_uris.append(file_uri)

        transcribed_text = ""
        pdf_uris = []
        for i, file_uri in enumerate(file_uris):
            text = transcribe_file(f"{job_name}_{i}", file_uri, transcribe_client, media_format, language_code)
            transcribed_text += f"Transcribed Text ({media_format.upper()}) - File {i + 1}:\n{text}\n\n"

            # Create PDF in memory
            pdf_buffer = BytesIO()
            pdf = canvas.Canvas(pdf_buffer)
            pdf.setFont("Helvetica", 12)
            pdf.drawString(10, 800, f"Transcribed Text ({media_format.upper()}) - File {i + 1}:")
            text_lines = text.split('\n')
            for j, line in enumerate(text_lines):
                pdf.drawString(10, 780 - j * 15, line)
            pdf.save()

            # Upload PDF to S3
            s3_bucket_name = 'my-s3-doc-loader'
            s3_key = f"{pdf_filename_prefix}_{media_format}_{timestamp}_{i}.pdf"
            s3_client = boto3.client('s3')
            pdf_buffer.seek(0)  # Move the buffer cursor to the beginning
            s3_client.upload_fileobj(pdf_buffer, s3_bucket_name, s3_key)

            pdf_uris.append(f's3://{s3_bucket_name}/{s3_key}')

        return transcribed_text, pdf_uris

    # Process MP3
    transcribed_text_mp3, s3_uris_mp3 = process_media_format('mp3')

    # Process MP4
    transcribed_text_mp4, s3_uris_mp4 = process_media_format('mp4')

    return transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4

# Call the function and get the transcribed text and S3 URIs for both MP3 and MP4
transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4 = transcribe_and_create_pdf()

# Print or use transcribed_text and s3_uris as needed for both MP3 and MP4
print("Transcribed Text (MP3):\n", transcribed_text_mp3)
print("PDFs saved to (MP3):\n", "\n".join(s3_uris_mp3))
print("\n")
print("Transcribed Text (MP4):\n", transcribed_text_mp4)
print("PDFs saved to (MP4):\n", "\n".join(s3_uris_mp4))


Waiting for My-transcription_mp3_1700803191_0. Current status is IN_PROGRESS.
Waiting for My-transcription_mp3_1700803191_0. Current status is IN_PROGRESS.
Job My-transcription_mp3_1700803191_0 is COMPLETED.
Waiting for My-transcription_mp4_1700803229_0. Current status is IN_PROGRESS.
Waiting for My-transcription_mp4_1700803229_0. Current status is IN_PROGRESS.
Job My-transcription_mp4_1700803229_0 is COMPLETED.
Transcribed Text (MP3):
 Transcribed Text (MP3) - File 1:
Cloud computing is the on demand delivery of it resources via the internet. With pay as you go pricing. Instead of buying, owning and maintaining physical data centers and servers, you can access technology services such as computing, power storage and databases on an as needed basis. From a cloud provider like Amazon web services organizations of every type size and industry are using the cloud for a wide variety of use cases such as data backup, disaster recovery, email, virtual desktops, software development and testi

In [4]:
import boto3
import os
import time
import urllib
import json
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.llms.bedrock import Bedrock
from reportlab.pdfgen import canvas
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import S3FileLoader

def transcribe_and_create_pdf(language_code='en-US',
                               job_name_prefix='My-transcription', pdf_filename_prefix='Transcription_output'):
    transcribe_client = boto3.client('transcribe')

    def transcribe_file(job_name, file_uri, transcribe_client, media_format, language_code):
        transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': file_uri},
            MediaFormat=media_format,
            LanguageCode=language_code
        )
        max_tries = 60
        while max_tries > 0:
            max_tries -= 1
            job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
            job_status = job['TranscriptionJob']['TranscriptionJobStatus']
            if job_status in ['COMPLETED', 'FAILED']:
                print(f"Job {job_name} is {job_status}.")
                if job_status == 'COMPLETED':
                    response = urllib.request.urlopen(job['TranscriptionJob']['Transcript']['TranscriptFileUri'])
                    data = json.loads(response.read())
                    text = data['results']['transcripts'][0]['transcript']
                break
            else:
                print(f"Waiting for {job_name}. Current status is {job_status}.")
            time.sleep(10)
        return text

    def process_media_format(media_format):
        timestamp = str(int(time.time()))
        job_name = f'{job_name_prefix}_{media_format}_{timestamp}'
        pdf_filename = f'{pdf_filename_prefix}_{media_format}_{timestamp}.pdf'

        file_uris = []
        while True:
            file_uri = input(f"Enter {media_format} file URI (enter 'done' when finished): ")
            if file_uri.lower() == 'done':
                break
            file_uris.append(file_uri)

        transcribed_text = ""
        for i, file_uri in enumerate(file_uris):
            text = transcribe_file(f"{job_name}_{i}", file_uri, transcribe_client, media_format, language_code)
            transcribed_text += f"Transcribed Text ({media_format.upper()}) - File {i + 1}:\n{text}\n\n"

            # Create PDF
            pdf = canvas.Canvas(f"{pdf_filename}_{i}")
            pdf.setFont("Helvetica", 12)
            pdf.drawString(10, 800, f"Transcribed Text ({media_format.upper()}) - File {i + 1}:")
            text_lines = text.split('\n')
            for j, line in enumerate(text_lines):
                pdf.drawString(10, 780 - j * 15, line)
            pdf.save()

        # Upload PDFs to S3
        s3_bucket_name = 'my-s3-doc-loader'
        s3_keys = [f"{pdf_filename}_{i}" for i in range(len(file_uris))]
        s3_client = boto3.client('s3')

        for i, s3_key in enumerate(s3_keys):
            with open(s3_key, 'rb') as pdf_file:
                s3_client.upload_fileobj(pdf_file, s3_bucket_name, s3_key)

        return transcribed_text, [f's3://{s3_bucket_name}/{key}' for key in s3_keys]

    # Process MP3
    transcribed_text_mp3, s3_uris_mp3 = process_media_format('mp3')

    # Ask the user if they want to process MP4
    process_mp4 = input("Do you want to process MP4 files? (y/n): ").lower()
    if process_mp4 == 'y':
        transcribed_text_mp4, s3_uris_mp4 = process_media_format('mp4')
    else:
        transcribed_text_mp4, s3_uris_mp4 = "", []

    return transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4

# Call the function and get the transcribed text and S3 URIs for both MP3 and MP4
transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4 = transcribe_and_create_pdf()


Waiting for My-transcription_mp3_1700804700_0. Current status is IN_PROGRESS.
Waiting for My-transcription_mp3_1700804700_0. Current status is IN_PROGRESS.
Job My-transcription_mp3_1700804700_0 is COMPLETED.


In [6]:
import boto3
import os
import time
import urllib
import json
from reportlab.pdfgen import canvas

def transcribe_and_create_pdf(language_code='en-US',
                               job_name_prefix='My-transcription', pdf_filename_prefix='Transcription_output'):
    transcribe_client = boto3.client('transcribe')

    def transcribe_file(job_name, file_uri, transcribe_client, media_format, language_code):
        transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': file_uri},
            MediaFormat=media_format,
            LanguageCode=language_code
        )
        max_tries = 60
        while max_tries > 0:
            max_tries -= 1
            job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
            job_status = job['TranscriptionJob']['TranscriptionJobStatus']
            if job_status in ['COMPLETED', 'FAILED']:
                print(f"Job {job_name} is {job_status}.")
                if job_status == 'COMPLETED':
                    response = urllib.request.urlopen(job['TranscriptionJob']['Transcript']['TranscriptFileUri'])
                    data = json.loads(response.read())
                    text = data['results']['transcripts'][0]['transcript']
                break
            else:
                print(f"Waiting for {job_name}. Current status is {job_status}.")
            time.sleep(10)
        return text

    def process_media_format(media_format):
        timestamp = str(int(time.time()))
        job_name = f'{job_name_prefix}_{media_format}_{timestamp}'
        pdf_filename = f'{pdf_filename_prefix}_{media_format}_{timestamp}.pdf'

        file_uris = []
        while True:
            file_uri = input(f"Enter {media_format} file URI (enter 'done' when finished): ")
            if file_uri.lower() == 'done':
                break
            file_uris.append(file_uri)

        transcribed_text = ""
        for i, file_uri in enumerate(file_uris):
            text = transcribe_file(f"{job_name}_{i}", file_uri, transcribe_client, media_format, language_code)
            transcribed_text += f"Transcribed Text ({media_format.upper()}) - File {i + 1}:\n{text}\n\n"

            # Create PDF
            pdf = canvas.Canvas(f"{pdf_filename}.pdf")  # Change this line
            pdf.setFont("Helvetica", 12)
            pdf.drawString(10, 800, f"Transcribed Text ({media_format.upper()}) - File {i + 1}:")
            text_lines = text.split('\n')
            for j, line in enumerate(text_lines):
                pdf.drawString(10, 780 - j * 15, line)
            pdf.save()

        # Upload PDFs to S3
        s3_bucket_name = 'my-s3-doc-loader'
        s3_keys = [f"{pdf_filename}"]  # Change this line
        s3_client = boto3.client('s3')

        for i, s3_key in enumerate(s3_keys):
            with open(s3_key, 'rb') as pdf_file:
                s3_client.upload_fileobj(pdf_file, s3_bucket_name, s3_key)

        return transcribed_text, [f's3://{s3_bucket_name}/{key}' for key in s3_keys]

    # Process MP3
    transcribed_text_mp3, s3_uris_mp3 = process_media_format('mp3')

    # Ask the user if they want to process MP4
    process_mp4 = input("Do you want to process MP4 files? (y/n): ").lower()
    if process_mp4 == 'y':
        transcribed_text_mp4, s3_uris_mp4 = process_media_format('mp4')
    else:
        transcribed_text_mp4, s3_uris_mp4 = "", []

    return transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4

# Call the function and get the transcribed text and S3 URIs for both MP3 and MP4
transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4 = transcribe_and_create_pdf()


Waiting for My-transcription_mp3_1700805315_0. Current status is IN_PROGRESS.
Waiting for My-transcription_mp3_1700805315_0. Current status is IN_PROGRESS.
Waiting for My-transcription_mp3_1700805315_0. Current status is IN_PROGRESS.
Job My-transcription_mp3_1700805315_0 is COMPLETED.


FileNotFoundError: [Errno 2] No such file or directory: 'Transcription_output_mp3_1700805315.pdf'

In [12]:
import boto3
import os
import time
import urllib
import json
from io import BytesIO
from reportlab.pdfgen import canvas

def transcribe_and_create_pdf(language_code='en-US',
                               job_name_prefix='My-transcription', pdf_filename_prefix='Transcription_output'):
    transcribe_client = boto3.client('transcribe')

    def transcribe_file(job_name, file_uri, transcribe_client, media_format, language_code):
        transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': file_uri},
            MediaFormat=media_format,
            LanguageCode=language_code
        )
        max_tries = 60
        while max_tries > 0:
            max_tries -= 1
            job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
            job_status = job['TranscriptionJob']['TranscriptionJobStatus']
            if job_status in ['COMPLETED', 'FAILED']:
                print(f"Job {job_name} is {job_status}.")
                if job_status == 'COMPLETED':
                    response = urllib.request.urlopen(job['TranscriptionJob']['Transcript']['TranscriptFileUri'])
                    data = json.loads(response.read())
                    text = data['results']['transcripts'][0]['transcript']
                break
            else:
                print(f"Waiting for {job_name}. Current status is {job_status}.")
            time.sleep(10)
        return text

    def process_media_format(media_format):
        timestamp = str(int(time.time()))
        job_name = f'{job_name_prefix}_{media_format}_{timestamp}'
        pdf_filename = f'{pdf_filename_prefix}_{media_format}_{timestamp}.pdf'

        file_uris = []
        while True:
            file_uri = input(f"Enter {media_format} file URI (enter 'done' when finished): ")
            if file_uri.lower() == 'done':
                break
            file_uris.append(file_uri)

        transcribed_text = ""
        for i, file_uri in enumerate(file_uris):
            text = transcribe_file(f"{job_name}_{i}", file_uri, transcribe_client, media_format, language_code)
            transcribed_text += f"Transcribed Text ({media_format.upper()}) - File {i + 1}:\n{text}\n\n"

            # Create PDF in memory
            pdf_buffer = BytesIO()
            pdf = canvas.Canvas(pdf_buffer)
            pdf.setFont("Helvetica", 12)
            pdf.drawString(10, 800, f"Transcribed Text ({media_format.upper()}) - File {i + 1}:")
            text_lines = text.split('\n')
            for j, line in enumerate(text_lines):
                pdf.drawString(10, 780 - j * 15, line)
            pdf.save()

            # Upload PDFs to S3
            s3_bucket_name = 'my-s3-doc-loader'
            s3_key = f"{pdf_filename}"

            # Reset the buffer position to the beginning
            pdf_buffer.seek(0)
            
            # Upload the PDF directly from the in-memory buffer
            s3_client = boto3.client('s3')
            s3_client.upload_fileobj(pdf_buffer, s3_bucket_name, s3_key)

        return transcribed_text, [f's3://{s3_bucket_name}/{s3_key}']

    # Process MP3
    transcribed_text_mp3, s3_uris_mp3 = process_media_format('mp3')

    # Ask the user if they want to process MP4
    process_mp4 = input("Do you want to process MP4 files? (y/n): ").lower()
    if process_mp4 == 'y':
        transcribed_text_mp4, s3_uris_mp4 = process_media_format('mp4')
    else:
        transcribed_text_mp4, s3_uris_mp4 = "", []

    return transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4

# Call the function and get the transcribed text and S3 URIs for both MP3 and MP4
transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4 = transcribe_and_create_pdf()


UnboundLocalError: cannot access local variable 's3_bucket_name' where it is not associated with a value

In [14]:
import boto3
import os
import time
import urllib
import json
from io import BytesIO
from reportlab.pdfgen import canvas

def transcribe_and_create_pdf(language_code='en-US',
                               job_name_prefix='My-transcription', pdf_filename_prefix='Transcription_output'):
    transcribe_client = boto3.client('transcribe')

    def transcribe_file(job_name, file_uri, transcribe_client, media_format, language_code):
        transcribe_client.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={'MediaFileUri': file_uri},
            MediaFormat=media_format,
            LanguageCode=language_code
        )
        max_tries = 60
        while max_tries > 0:
            max_tries -= 1
            job = transcribe_client.get_transcription_job(TranscriptionJobName=job_name)
            job_status = job['TranscriptionJob']['TranscriptionJobStatus']
            if job_status in ['COMPLETED', 'FAILED']:
                print(f"Job {job_name} is {job_status}.")
                if job_status == 'COMPLETED':
                    response = urllib.request.urlopen(job['TranscriptionJob']['Transcript']['TranscriptFileUri'])
                    data = json.loads(response.read())
                    text = data['results']['transcripts'][0]['transcript']
                break
            else:
                print(f"Waiting for {job_name}. Current status is {job_status}.")
            time.sleep(10)
        return text

    def process_media_format(media_format):
        timestamp = str(int(time.time()))
        job_name = f'{job_name_prefix}_{media_format}_{timestamp}'
        pdf_filename = f'{pdf_filename_prefix}_{media_format}_{timestamp}.pdf'

        file_uris = []
        while True:
            file_uri = input(f"Enter {media_format} file URI (enter 'done' when finished, enter 'skip' to skip): ")
            if file_uri.lower() == 'done':
                break
            elif file_uri.lower() == 'skip':
                continue
            file_uris.append(file_uri)

        if not file_uris:
            return "", []

        transcribed_text = ""
        for i, file_uri in enumerate(file_uris):
            text = transcribe_file(f"{job_name}_{i}", file_uri, transcribe_client, media_format, language_code)
            transcribed_text += f"Transcribed Text ({media_format.upper()}) - File {i + 1}:\n{text}\n\n"

            # Create PDF in memory
            pdf_buffer = BytesIO()
            pdf = canvas.Canvas(pdf_buffer)
            pdf.setFont("Helvetica", 12)
            pdf.drawString(10, 800, f"Transcribed Text ({media_format.upper()}) - File {i + 1}:")
            text_lines = text.split('\n')
            for j, line in enumerate(text_lines):
                pdf.drawString(10, 780 - j * 15, line)
            pdf.save()

            # Upload PDFs to S3
            s3_bucket_name = 'my-s3-doc-loader'
            s3_key = f"{pdf_filename}"

            # Reset the buffer position to the beginning
            pdf_buffer.seek(0)
            
            # Upload the PDF directly from the in-memory buffer
            s3_client = boto3.client('s3')
            s3_client.upload_fileobj(pdf_buffer, s3_bucket_name, s3_key)

        return transcribed_text, [f's3://{s3_bucket_name}/{s3_key}']

    # Ask the user if they want to process MP3
    process_mp3 = input("Do you want to process MP3 files? (y/n): ").lower()
    if process_mp3 == 'y':
        transcribed_text_mp3, s3_uris_mp3 = process_media_format('mp3')
    else:
        transcribed_text_mp3, s3_uris_mp3 = "", []

    # Ask the user if they want to process MP4
    process_mp4 = input("Do you want to process MP4 files? (y/n): ").lower()
    if process_mp4 == 'y':
        transcribed_text_mp4, s3_uris_mp4 = process_media_format('mp4')
    else:
        transcribed_text_mp4, s3_uris_mp4 = "", []

    return transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4

# Call the function and get the transcribed text and S3 URIs for both MP3 and MP4
transcribed_text_mp3, s3_uris_mp3, transcribed_text_mp4, s3_uris_mp4 = transcribe_and_create_pdf()


Waiting for My-transcription_mp4_1700806688_0. Current status is IN_PROGRESS.
Waiting for My-transcription_mp4_1700806688_0. Current status is IN_PROGRESS.
Job My-transcription_mp4_1700806688_0 is COMPLETED.
