In [25]:
from youtube_transcript_api import YouTubeTranscriptApi

video_id = "UJCVt2rNOgs"
text_language = "en"

transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
transcript = transcript_list.find_generated_transcript([text_language]).fetch()
# Convierte los tiempos start a un formato standard

text = ""

for t in transcript:

    hours = int(t["start"] // 3600)
    min = int((t["start"] // 60) % 60)
    sec = int(t["start"] % 60)
    t["start"] = f"{hours:02d}:{min:02d}:{sec:02d}"

    text += f'\n{t["text"]} - (start:{t["start"]})'

In [30]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain,RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.memory import ConversationBufferMemory
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv
from langchain.vectorstores import FAISS


load_dotenv()

template = """Answer the question in your own words from the 
context given to you. The context given to you is the transcript of a video.
If questions are asked where there is no relevant context available, please answer from 
what you know.

Context: {context}

Human: {question}
Assistant:"""

prompt = PromptTemplate (

input_variables=["context",  "question"], template=template)

text_splitter = CharacterTextSplitter(separator="\n",chunk_size=1000, chunk_overlap=200, length_function=len)
chunks = text_splitter.split_text(text)

embeddings = OpenAIEmbeddings()

vectorstore = FAISS.from_texts(chunks, embeddings)

llm = OpenAI()

#memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

qa = RetrievalQA.from_chain_type(llm, retriever=vectorstore.as_retriever(), chain_type_kwargs={'prompt': prompt}) #memory=memory,

question1= "Could you divide the video into sequential parts? Tell me the time they starts and explain each part."
result1 = qa({"query": question1})
print(result1)

{'query': 'Could you divide the video into sequential parts? Tell me the time they starts and explain each part.', 'result': " Sure, the video can be divided into the following parts:\n- 00:00:59 - 00:01:03: The speaker talks about feeling surreal with the state-of-the-art M2 and R1 chips inside the product and its ability to deliver images quickly.\n- 00:01:03 - 00:01:10: The speaker mentions the features of the product, such as built-in cameras, microphones, and seamless finger navigation.\n- 00:01:10 - 00:01:34: The speaker expresses their surprise and excitement about the product's intuitiveness and ease of use, specifically for tasks like using FaceTime and browsing Safari.\n- 00:01:34 - 00:01:52: The speaker discusses the popularity and anticipation for the product, as well as its limited availability at launch.\n- 00:01:52 - 00:02:38: The speaker talks about their personal desire to use the product as a large monitor for watching movies or videos.\n- 00:02:38 - 00:02:58: The spe