<a href="https://colab.research.google.com/github/seanlee10/notebooks/gen-ai-playground/blob/main/youtube_summarizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Install dependencies

In [1]:
%pip install -qU yt-dlp boto3 deepgram-sdk langchain-aws langchain-core langchain-groq langchain-nvidia-ai-endpoints langchain-anthropic

Note: you may need to restart the kernel to use updated packages.


# 2. Download Youtube video as MP3

In [None]:
import re
import os
import time
import boto3
import requests
from yt_dlp import YoutubeDL
from google.colab import userdata

AWS_ACCESS_KEY_ID = userdata.get('aws_access_key_id')
AWS_SECRET_ACCESS_KEY = userdata.get('aws_secret_access_key')
AWS_REGION = "<<YOUR_REGION>>"
BUCKET_NAME = "<<YOUR_BUCKET_NAME>>"

session = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
    region_name=AWS_REGION
)

video_url = input('Enter the video url: ')

def to_snake_case(string):
    # Replace any non-word character with an underscore
    s1 = re.sub(r'[^\w]+', '_', string)
    # Insert an underscore before any uppercase letter which is preceded by a lowercase letter or number
    s2 = re.sub(r'([a-z0-9])([A-Z])', r'\1_\2', s1)
    # Convert to lowercase
    return s2.lower()

def download_audio(link):
  with YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'download.mp3'}) as video:
    info_dict = video.extract_info(link, download = True)
    video_title = to_snake_case(info_dict['title'])
    print(video_title)
    video.download(link)
    print("Successfully Downloaded - see local folder on Google Colab")

    return video_title

video_title = download_audio(video_url)

# 3. Create Transcribe Job with Amazon Transcribe

In [None]:
s3 = session.client('s3')
transcribe = session.client('transcribe')

local_file = 'download.mp3'
s3_file = f"{video_title}.mp3"

def upload_to_s3(local_file, BUCKET_NAME, s3_file):
    s3 = session.client('s3')

    try:
        s3.upload_file(local_file, BUCKET_NAME, s3_file)
        print(f"Upload Successful: {local_file} uploaded to {BUCKET_NAME}/{s3_file}")
        return True
    except FileNotFoundError:
        print(f"The file {local_file} was not found")
        return False

uploaded = upload_to_s3(local_file, BUCKET_NAME, s3_file)

if uploaded:
    print("File upload completed")
else:
    print("File upload failed")

In [None]:
def create_transcribe_job(job_name, audio_file_uri):
    response = transcribe.start_transcription_job(
        TranscriptionJobName=job_name,
        LanguageCode='en-US',  # Specify the language code
        MediaFormat='mp3',  # Specify the media format (e.g., mp3, wav)
        Media={
            'MediaFileUri': audio_file_uri
        },
        Settings={
            'ShowSpeakerLabels': True,
            'MaxSpeakerLabels': 3
        }
    )

    return response

# Example usage
job_name = video_title
audio_file_uri = f"s3://{BUCKET_NAME}/{s3_file}"

response = create_transcribe_job(job_name, audio_file_uri)
print(f"Transcription job created: {response['TranscriptionJob']['TranscriptionJobName']}")

# 4. Wait for Transcription Job to finish

In [None]:
job_status = response['TranscriptionJob']['TranscriptionJobStatus']

while job_status != "COMPLETED":

    response = transcribe.get_transcription_job(
        TranscriptionJobName=job_name
    )


    job_status = response['TranscriptionJob']['TranscriptionJobStatus']
    time.sleep(10)

# 5. 

In [None]:

def get_transcription_result(job_name):
    job = transcribe.get_transcription_job(
        TranscriptionJobName=job_name
    )

    transcript_uri = job['TranscriptionJob']['Transcript']['TranscriptFileUri']

    print(transcript_uri)
    # Parse S3 URI to get bucket and key
    from urllib.parse import urlparse
    parsed_uri = urlparse(transcript_uri)
    bucket = parsed_uri.netloc
    key = parsed_uri.path.lstrip('/')

    # Download the transcription file
    response = requests.get(transcript_uri)
    if response.status_code == 200:
        transcript = response.json()

        # Extract the transcription text
        transcription_text = transcript['results']['transcripts'][0]['transcript']
        return transcription_text
    else:
        return f"Failed to download transcript. Status code: {response.status_code}"

transcription = get_transcription_result(job_name)

# uncomment this block if you want to keep the transcript as file
# with open(f"{video_title}.txt", "w") as text_file:
#     text_file.write(transcription)

In [None]:
from langchain_aws import ChatBedrock
from langchain_groq import ChatGroq
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate

# BEDROCK_CLIENT = session.client("bedrock-runtime", 'ap-northeast-2')
# llm = ChatBedrock(
#     model_id="anthropic.claude-3-5-sonnet-20240620-v1:0",
#     model_kwargs=dict(temperature=0),
#     client=BEDROCK_CLIENT,
# )

# os.environ["GROQ_API_KEY"] = userdata.get('groq')
# llm = ChatGroq(
#     model="llama-3.1-70b-versatile",
#     temperature=0,
#     max_tokens=None,
# )

# os.environ["NVIDIA_API_KEY"] = userdata.get('nvidia')
# llm = ChatNVIDIA(
#     model="nvidia/llama-3.1-nemotron-70b-instruct"
# )

os.environ["ANTHROPIC_API_KEY"] = userdata.get('anthropic')

llm = ChatAnthropic(
    model="claude-3-5-sonnet-latest",
    temperature=0,
)

template = """
I'd like to write a blog post with the insights from the provided transcript. list all important points with supporting quotes.

<transcript>
{TRANSCRIPT}
</transcript>
"""

prompt = ChatPromptTemplate.from_template(template)

chain = prompt | llm | (lambda x: x.content)

summary = chain.invoke({"TRANSCRIPT": transcription})
print(summary)
