In [1]:
import os
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import faiss
from langchain_core.prompts import PromptTemplate

from Chains.practice import parallel_chain

In [7]:
video_id = "fNk_zzaMoSs" # Video id from url
try:
    # If you don’t care which language, this returns the “best” one
    instance = YouTubeTranscriptApi()
    transcript_list = instance.fetch(video_id=video_id, languages=["en"])

    # Flatten plain text
    transcript = " ".join(chunk.text for chunk in transcript_list.snippets)
    print(transcript)
except TranscriptsDisabled:
    print("No captions available for this video.")

[Translated by Grant Sanderson. Submit corrections at criblate.com] The fundamental, root-of-it-all building block for linear algebra is the vector. So it's worth making sure that we're all on the same page about what exactly a vector is. You see, broadly speaking, there are three distinct but related ideas about vectors, which I'll call the physics student perspective, the computer science student perspective, and the mathematician's perspective. The physics student perspective is that vectors are arrows pointing in space. What defines a given vector is its length and the direction it's pointing, but as long as those two facts are the same, you can move it all around, and it's still the same vector. Vectors that live in the flat plane are two-dimensional, and those sitting in broader space that you and I live in are three-dimensional. The computer science perspective is that vectors are ordered lists of numbers. For example, let's say you were doing some analytics about house prices, 

In [8]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=256)
chunks = splitter.create_documents([transcript])

In [9]:
len(chunks)

13

In [11]:
chunks

[Document(metadata={}, page_content="[Translated by Grant Sanderson. Submit corrections at criblate.com] The fundamental, root-of-it-all building block for linear algebra is the vector. So it's worth making sure that we're all on the same page about what exactly a vector is. You see, broadly speaking, there are three distinct but related ideas about vectors, which I'll call the physics student perspective, the computer science student perspective, and the mathematician's perspective. The physics student perspective is that vectors are arrows pointing in space. What defines a given vector is its length and the direction it's pointing, but as long as those two facts are the same, you can move it all around, and it's still the same vector. Vectors that live in the flat plane are two-dimensional, and those sitting in broader space that you and I live in are three-dimensional. The computer science perspective is that vectors are ordered lists of numbers. For example, let's say you were doin

In [12]:
from dotenv import load_dotenv
load_dotenv()

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = faiss.FAISS.from_documents(documents=chunks, embedding=embeddings)

In [13]:
vector_store.index_to_docstore_id

{0: '1196262b-2c12-4f46-a928-da7eb3309175',
 1: '7c3fa3a8-3d1a-4e8e-a34e-ea89eb980826',
 2: 'a97275e1-307a-4eb5-abc6-20c3861cb750',
 3: 'e99eb775-02a8-41df-a411-36fc4bbe87e8',
 4: '052ed5e2-3535-45db-859b-7a75b2a24d19',
 5: 'a387d544-9422-47ee-8b0f-499147ffdee1',
 6: '6cd4329b-0585-40cc-84da-ab80153a44ec',
 7: '2b8a4750-5701-4c9d-aa60-35b902e50d1d',
 8: '782fac62-9736-4143-92b8-a5ba62bfb2fe',
 9: 'abe9f0e2-73f3-4043-b812-045d71461fb7',
 10: '012463c7-c881-4b13-a4c0-7d426656e7ff',
 11: 'd63659ba-72bc-4e47-bae8-f93dee7b37aa',
 12: '83938c85-720d-4fa9-b080-72116b6885f4'}

In [14]:
vector_store.get_by_ids(["1196262b-2c12-4f46-a928-da7eb3309175"])

[Document(id='1196262b-2c12-4f46-a928-da7eb3309175', metadata={}, page_content="[Translated by Grant Sanderson. Submit corrections at criblate.com] The fundamental, root-of-it-all building block for linear algebra is the vector. So it's worth making sure that we're all on the same page about what exactly a vector is. You see, broadly speaking, there are three distinct but related ideas about vectors, which I'll call the physics student perspective, the computer science student perspective, and the mathematician's perspective. The physics student perspective is that vectors are arrows pointing in space. What defines a given vector is its length and the direction it's pointing, but as long as those two facts are the same, you can move it all around, and it's still the same vector. Vectors that live in the flat plane are two-dimensional, and those sitting in broader space that you and I live in are three-dimensional. The computer science perspective is that vectors are ordered lists of nu

In [15]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [16]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000220A8C8E120>, search_kwargs={'k': 4})

In [17]:
retriever.invoke("What are vectors?")

[Document(id='1196262b-2c12-4f46-a928-da7eb3309175', metadata={}, page_content="[Translated by Grant Sanderson. Submit corrections at criblate.com] The fundamental, root-of-it-all building block for linear algebra is the vector. So it's worth making sure that we're all on the same page about what exactly a vector is. You see, broadly speaking, there are three distinct but related ideas about vectors, which I'll call the physics student perspective, the computer science student perspective, and the mathematician's perspective. The physics student perspective is that vectors are arrows pointing in space. What defines a given vector is its length and the direction it's pointing, but as long as those two facts are the same, you can move it all around, and it's still the same vector. Vectors that live in the flat plane are two-dimensional, and those sitting in broader space that you and I live in are three-dimensional. The computer science perspective is that vectors are ordered lists of nu

In [18]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)

In [19]:
prompt = PromptTemplate(
    template="""
    You are a helpful assistant.
    Answer ONLY from the provided transcript context.
    if the context is insufficient, just say don't know.

    {context}
    Question: {question}
    """,
    input_variables=["context", "question"],
)

In [20]:
question = "What are vectors?"
retrieved_docs = retriever.invoke(question)

In [21]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

In [22]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [23]:
final_prompt

StringPromptValue(text="\n    You are a helpful assistant.\n    Answer ONLY from the provided transcript context.\n    if the context is insufficient, just say don't know.\n\n    [Translated by Grant Sanderson. Submit corrections at criblate.com] The fundamental, root-of-it-all building block for linear algebra is the vector. So it's worth making sure that we're all on the same page about what exactly a vector is. You see, broadly speaking, there are three distinct but related ideas about vectors, which I'll call the physics student perspective, the computer science student perspective, and the mathematician's perspective. The physics student perspective is that vectors are arrows pointing in space. What defines a given vector is its length and the direction it's pointing, but as long as those two facts are the same, you can move it all around, and it's still the same vector. Vectors that live in the flat plane are two-dimensional, and those sitting in broader space that you and I live

In [25]:
answer = llm.invoke(final_prompt)
print(answer.content)

Vectors are mathematical objects that can be understood from three distinct perspectives: 

1. **Physics Student Perspective**: Vectors are arrows pointing in space, defined by their length and direction. They can be two-dimensional (in a flat plane) or three-dimensional (in broader space).

2. **Computer Science Perspective**: Vectors are ordered lists of numbers. For example, in analytics, a vector could represent features of a house, such as square footage and price, modeled as a pair of numbers.

3. **Mathematician's Perspective**: Vectors are generalized objects that can be anything where there is a sensible notion of adding two vectors and multiplying a vector by a number. 

In a geometric context, vectors are often represented as arrows in a coordinate system, with their tail at the origin and their tip indicating the vector's direction and magnitude. In two dimensions, they are represented by a pair of numbers, while in three dimensions, they are represented by an ordered tripl

## Building a Chain

In [26]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [27]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [28]:
parallel_chain = RunnableParallel({
    "context": retriever | RunnableLambda(format_docs),
    "question": RunnablePassthrough()
})

In [29]:
parallel_chain.invoke("What are vectors?")

{'context': "[Translated by Grant Sanderson. Submit corrections at criblate.com] The fundamental, root-of-it-all building block for linear algebra is the vector. So it's worth making sure that we're all on the same page about what exactly a vector is. You see, broadly speaking, there are three distinct but related ideas about vectors, which I'll call the physics student perspective, the computer science student perspective, and the mathematician's perspective. The physics student perspective is that vectors are arrows pointing in space. What defines a given vector is its length and the direction it's pointing, but as long as those two facts are the same, you can move it all around, and it's still the same vector. Vectors that live in the flat plane are two-dimensional, and those sitting in broader space that you and I live in are three-dimensional. The computer science perspective is that vectors are ordered lists of numbers. For example, let's say you were doing some analytics about h

In [30]:
parser = StrOutputParser()

In [32]:
main_chain = parallel_chain | prompt | llm | parser

In [33]:
main_chain.invoke("What are vectors?")

"Vectors are fundamental building blocks in linear algebra, broadly defined in three perspectives: \n\n1. **Physics Student Perspective**: Vectors are arrows pointing in space, defined by their length and direction. They can be two-dimensional (in a flat plane) or three-dimensional (in broader space).\n\n2. **Computer Science Perspective**: Vectors are ordered lists of numbers. For example, in analytics, a vector could represent features of a house, like square footage and price, modeled as a pair of numbers.\n\n3. **Mathematician's Perspective**: Vectors can be generalized to anything where there is a sensible notion of adding two vectors and multiplying a vector by a number.\n\nIn a geometric context, vectors are often visualized as arrows in a coordinate system, with their tail at the origin and their tip indicating direction and magnitude. In two dimensions, they are represented by ordered pairs of numbers, while in three dimensions, they are represented by ordered triplets."