### Install packages

In [None]:
%pip install langchain youtube-transcript-api llama_index llama_hub

### Insert youtube url

In [None]:
youtube_url='https://www.youtube.com/watch?v=DcWqzZ3I2cY'

#check that youtube url is valid
from llama_hub.youtube_transcript import is_youtube_video
is_youtube_video(youtube_url) 

### Extract youtube transcript 

In [None]:
from llama_hub.youtube_transcript import YoutubeTranscriptReader

loader = YoutubeTranscriptReader()
documents = loader.load_data(ytlinks=[youtube_url])
yt_transcript = documents[0].text

print(yt_transcript)

In [None]:
# save youtube video transcript and metadata

import json
from dataclasses import asdict

extracted_data = [{
    "id": doc.id_,
    "metadata": doc.metadata,
    "text": doc.text
} for doc in documents]

# Save as JSON
with open('transcript.json', 'w', encoding='utf-8') as f:
    json.dump(extracted_data, f, ensure_ascii=False, indent=4)

### Load the open source LLM

In [None]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chat_models import ChatOllama

#change the open source model as required 
chat_model = ChatOllama(model="mistral", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True)


### Load the transcript from saved json file and construct prompt

In [None]:
from langchain.schema import HumanMessage, SystemMessage

import json

# Load the transcript.json file
with open('transcript.json', 'r') as file:
    data = json.load(file)

# Extract the text from the first item in the data
text_content = data[0]['text']

# Strip all new lines in the text content
text_content_stripped = text_content.replace('\n', ' ')

# Create a HumanMessage with the extracted text
human_message = HumanMessage(content=text_content_stripped)

# Create a SystemMessage to summarize the text

system_prompt = f"""Generate a concise summary of the transcript provided below."""

system_message = SystemMessage(content=system_prompt)

# Pass the human_message to the chat_model
messages = [system_message, human_message]

messages

### Call the model to summarize the transcript

In [None]:
summary = chat_model(messages)

In [None]:
# Extract the content from the AIMessage
summary_content = summary.content.replace('.', '.\n\n')

# Save the content to a txt file
with open('summary.txt', 'w') as file:
    file.write(summary_content)