📚 Check out the [**Talk to YouTube Videos with Haystack Pipelines**](https://haystack.deepset.ai/blog/talk-to-youtube-videos-with-haystack-pipelines) article for a detailed run through of this example.

## Install the Dependencies

In [None]:
!pip install pytube
!pip install farm-haystack[weaviate,inference,file-conversion,preprocessing]

## (If Needed) Set Your API Token for desired the Model Provider

In [None]:
from getpass import getpass

api_key = getpass("Enter OpenAI API key:")

## The Indexing Pipelne

In [None]:
import weaviate
from weaviate.embedded import EmbeddedOptions

client = weaviate.Client(
  embedded_options=weaviate.embedded.EmbeddedOptions()
)

In [None]:
from haystack.document_stores import WeaviateDocumentStore

document_store = WeaviateDocumentStore(port=6666)

In [None]:
from pytube import YouTube

def youtube2audio (url: str):
    yt = YouTube(url)
    video = yt.streams.filter(abr='160kbps').last()
    return video.download()


In [None]:
from haystack.nodes import EmbeddingRetriever, PreProcessor
from haystack.nodes.audio import WhisperTranscriber
from haystack.pipelines import Pipeline

preprocessor = PreProcessor()
embedder = EmbeddingRetriever(document_store=document_store, embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1")
whisper = WhisperTranscriber(api_key=api_key)

indexing_pipeline = Pipeline()
indexing_pipeline.add_node(component=whisper, name="Whisper", inputs=["File"])
indexing_pipeline.add_node(component=preprocessor, name="Preprocessor", inputs=["Whisper"])
indexing_pipeline.add_node(component=embedder, name="Embedder", inputs=["Preprocessor"])
indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["Embedder"])

### Run the Indexing Pipeline

In [None]:
videos = ["https://www.youtube.com/watch?v=h5id4erwD4s", "https://www.youtube.com/watch?v=iFUeV3aYynI"]

for video in videos:
  file_path = youtube2audio(video)
  indexing_pipeline.run(file_paths=[file_path])


## The RAG Pipeline

In [None]:
from haystack.nodes import PromptNode, PromptTemplate, AnswerParser

video_qa_prompt = PromptTemplate(prompt="You will be provided some transcripts from the AI Engineer livestream. Please answer the query based on what is said in the livestream.\n"
                                        "Video Transcripts: {join(documents)}\n"
                                        "Query: {query}\n"
                                        "Answer:", output_parser = AnswerParser())

prompt_node = PromptNode(model_name_or_path="gpt-4", api_key=api_key, default_prompt_template=video_qa_prompt)

In [None]:
video_rag_pipeline = Pipeline()
video_rag_pipeline.add_node(component=embedder, name="Retriever", inputs=["Query"])
video_rag_pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["Retriever"])

### Run the RAG Pipeline

In [None]:
result = video_rag_pipeline.run("Why do we do chunking?")
print(result['answers'][0].answer)