In [1]:
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from dotenv import find_dotenv, load_dotenv
import textwrap

In [2]:
load_dotenv(find_dotenv())
embeddings = OpenAIEmbeddings()

In [3]:
def create_db_from_youtube_video_url(video_url):
  # Load transcript from Youtube url
  loader = YoutubeLoader.from_youtube_url(video_url)
  transcript = loader.load()
  
  # Split text into chunks
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
  docs = text_splitter.split_documents(transcript)
  
  # Store text embeddings into vector database
  db = FAISS.from_documents(docs, embeddings)
  return db

In [4]:
def get_response_from_query(query, db, k=4):
  
  # Query vector database
  query = 'how to create a chatbot from a website'
  docs = db.similarity_search(query, k=4)
  
  # Join docs to make sentence
  docs_page_content = ''.join([d.page_content for d in docs])
  
  # Create model
  chat = ChatOpenAI(model='gpt-3.5-turbo', temperature=0.2)
  
  # template to use as system prompt
  template = """
    You are a helpful assistant that can answer questions about a youtube video based on the transcript: {docs}
    
    Only use factual information from the transcript to answer the questions.
    
    If you feel like you don't have enough information to answer the question, say "I don't know"
    
    Your answer should be verbose and detailed.
    """

  system_message_prompt = SystemMessagePromptTemplate.from_template(template)

  # Human prompt
  human_template = 'Answer the following: {question}'
  human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)

  # Chat prompt
  chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

  chain = LLMChain(llm=chat, prompt=chat_prompt, )
  response = chain.run(question=query, docs=docs_page_content)
  response = response.replace('\n', '')
  return response, docs

In [5]:
video_url = 'https://www.youtube.com/watch?v=RBnuhhmD21U&t=784s'
db = create_db_from_youtube_video_url(video_url)

In [6]:
query = 'How can i train langchain on my website'
response, docs = get_response_from_query(query, db)
print(textwrap.fill(response, width=80))

To create a chatbot from a website, you can follow these steps:1. Understand the
website structure: Start by analyzing the website and understanding its
structure. Identify the different pages or URLs that make up the website. This
can be done by using the site map of the website, which lists all the pages and
their corresponding addresses.2. Divide web pages into smaller documents: Once
you have identified the web pages, you need to divide them into smaller
documents. This is necessary because chatbots typically work with smaller chunks
of text. You can divide the web pages into paragraphs, sections, or any other
logical divisions that make sense for your chatbot's purpose.3. Choose a
language model: To train your chatbot, you will need a large language model.
There are various options available, such as OpenAI's language models or open-
source models like GPT-4 or LAMA Index. Select a language model that suits your
requirements and capabilities.4. Embedding the documents: Next, you n