In [16]:
from youtube_transcript_api import YouTubeTranscriptApi
from langchain_classic.text_splitter import RecursiveCharacterTextSplitter
from langchain_classic.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace, HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

In [17]:
# Embedding model config
embedding_model = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")
llm = HuggingFaceEndpoint(
    repo_id="google/gemma-2-2b-it",
    task="text-generation",
    temperature=0.2
)
model = ChatHuggingFace(llm=llm)

### 1. INDEXING

In [18]:
# ============================== Step 1: Document Loading ==============================
video_id = "Gfr50f6ZBvo"

api = YouTubeTranscriptApi()
# list of available transcripts
transcript_list = api.list(video_id=video_id)
# fetch transcripts
transcript = transcript_list.find_transcript(language_codes=["en", "hi"]).fetch()

result = " ".join(chunk.text for chunk in transcript)
# video transcript
result

# ============================== Step 2: Text Splitting ==============================
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
# documents splitted into smaller chunks
chunks = splitter.create_documents(texts=[result])

# ============================== Step 3: Store it into a vector store ==============================
vector_store = FAISS.from_documents(
    documents=chunks,
    embedding=embedding_model
)

###

### 2. RETRIEVAL

In [25]:
retriever = vector_store.as_retriever(
    search_type="similarity",
    kwargs={"k":4}
)
query = "give me summary of this video"

result = retriever.invoke(query)

for i, res in enumerate(result, start=1):
    print(f"Result {i}")
    print(res.page_content)

Result 1
deeper maybe simpler explanation yes of things right than the standard model of physics which we know doesn't work but we still keep adding to so um and and that's how i think the beginning of an explanation would look and it would start encompassing many of the mysteries that we have wondered about for thousands of years like you know consciousness uh life and gravity all of these things yeah giving us a glimpses of explanations for those things yeah well um damas dear one of the special human beings in this giant puzzle of ours and it's a huge honor that you would take a pause from the bigger puzzle to solve this small puzzle of a conversation with me today it's truly an honor and a pleasure thank you thank you i really enjoyed it thanks lex thanks for listening to this conversation with demas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about comput

### 3. AUGMENTATION

In [26]:
prompt = PromptTemplate(
    template="""
Answer the question using only the context given below, if the context is not available return "I don't know"
context: {result}
question: {query}
""",
input_variables=["result", "query"]
)

### 4. GENERATION

In [27]:
parser = StrOutputParser()

chain = prompt | model | parser

final_result = chain.invoke({"result": result, "query": query})
print(final_result)

The video discusses the beginning of explanations for fundamental mysteries like consciousness, life, and gravity. It suggests that a more fundamental explanation of physics, perhaps simpler than the current standard model, might be the starting point. The speaker compares it to how astronomy relies on telescopes to understand the universe. 

