In [1]:
import os
from langchain_openai import ChatOpenAI
from langchain_community.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain.chains import LLMChain
from dotenv import load_dotenv, find_dotenv
import textwrap

_ = load_dotenv(find_dotenv()) # read local .env file



In [2]:
embeddings = OpenAIEmbeddings()

def create_vector_db_from_youtube_video_url(video_url):
    # Retrieves video transcript from youtube url
    loader = YoutubeLoader.from_youtube_url(video_url)
    transcript = loader.load()

    # Split transcript into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=100)
    docs = text_splitter.split_documents(transcript)

    # FAISS - Vector DB from facebook
    # Vector DBs are for storing vectors (numerical arrays)
    # Vectors are used to make embeddings (In ML embeddings can be used to represent words, sentences, images, audio...)
    db = FAISS.from_documents(docs, embeddings)
    return db

  warn_deprecated(


In [3]:
def get_response_from_query(db, query, k=4):
    docs = db.similarity_search(query, k=k)
    docs_page_content = " ".join([d.page_content for d in docs])

    chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.2)

    # Template to use for the system message prompt
    template = """
        You are a helpful assistant that that can answer questions about youtube videos 
        based on the video's transcript: {docs}
        
        Only use the factual information from the transcript to answer the question.
        
        If you feel like you don't have enough information to answer the question, say "I don't know".
        
        """

    system_message_prompt = SystemMessagePromptTemplate.from_template(template)

    # Human question prompt
    human_template = "Answer the following question: {question}"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

    chat_prompt = ChatPromptTemplate.from_messages(
        [system_message_prompt, human_message_prompt]
    )

    chain = LLMChain(llm=chat, prompt=chat_prompt)

    response = chain.run(question=query, docs=docs_page_content)
    response = response.replace("\n", "")
    return response, docs

In [4]:
# Example usage:
video_url = "https://www.youtube.com/watch?v=th4j9JxWGko"
db = create_vector_db_from_youtube_video_url(video_url)

query = "what is this video about?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

  warn_deprecated(


This video is about the speaker's new data science
workflow using AI tools like Chat GPT and GitHub
co-pilot. They explain how these tools have
changed their workflow and how they use them to
complete data science projects efficiently. They
also demonstrate their workflow using a Kaggle
competition dataset.


In [5]:
# Example usage:
video_url = "https://www.youtube.com/watch?v=HFYv-rk4v9Y"
db = create_vector_db_from_youtube_video_url(video_url)

query = "what is this video about?"
response, docs = get_response_from_query(db, query)
print(textwrap.fill(response, width=50))

This video is about Japan's economic experiments,
including negative interest rates and yield curve
control, and the recent decision by the Bank of
Japan to end these policies. It discusses the
impact of these changes on the Japanese economy
and everyday lives in the country.


In [15]:
def combined_function(video_url):
    db = create_vector_db_from_youtube_video_url(video_url)

    query = "what is this video about?"
    response, docs = get_response_from_query(db, query)
    return response

In [21]:
import panel as pn  # GUI

# Create a TextInput widget
text_input = pn.widgets.TextInput(name='Input Box:', placeholder='Type Youtube URL here...')

# Create a Button widget
submit_button = pn.widgets.Button(name='Submit')

# Create a StaticText widget to display the received input
received_text = pn.widgets.StaticText(value='')

# Define a function to handle button click event
def submit_callback(event):
    video_url = text_input.value
    
    # Call process_input asynchronously to not block the main thread
    combined_function(video_url)
    
    process_input(response)

# Define a function to process the input
def process_input(value):
    # Update the value of the StaticText widget
    received_text.value = value
    

# Assign the function to the button's on_click event
submit_button.on_click(submit_callback)

# Create a panel layout containing the input box and button
panel_layout = pn.Column(
    '### Youtube Video Summariser',  # Markdown title
    text_input,  # TextInput widget
    submit_button,  # Button widget
    received_text
)

# Display the panel layout
panel_layout.show()

Launching server at http://localhost:54521


<panel.io.server.Server at 0x11b7474c0>