In [1]:
!pip install -q python-dotenv

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

False

In [3]:
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

# This is the YouTube video we're going to use.
YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=cdiD-9MMpb0"

## Setting up the model
Define the LLM that will be part of the workflow

In [4]:
!pip install -q langchain openai langchain_openai

In [5]:
from langchain_openai.chat_models import ChatOpenAI
model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model="gpt-3.5-turbo")

### Testing the model

In [34]:
model.invoke("Which NBA team won the championship in 2021?")

AIMessage(content='The Milwaukee Bucks won the NBA championship in 2021.', response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 18, 'total_tokens': 30}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f0c1f90e-2672-4ef1-ba19-724caf83485b-0')

### Transforming it in `STR`

In [35]:
from langchain_core.output_parsers import StrOutputParser
parser = StrOutputParser()

chain = model | parser
chain.invoke("Which NBA team won the championship in 2021?")

'The Milwaukee Bucks won the NBA championship in 2021.'

## Introducing `prompt` templates

In [36]:
from langchain.prompts import ChatPromptTemplate
template = """
Answer the question based on the context below. If you can't
answer the question, reply "I don't know".

Context: {context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)
prompt.format(context="The Great Wall of China is one of the Seven Wonders of the Medieval World", question="What is one of the Seven Wonders of the Medieval World?")

'Human: \nAnswer the question based on the context below. If you can\'t\nanswer the question, reply "I don\'t know".\n\nContext: The Great Wall of China is one of the Seven Wonders of the Medieval World\nQuestion: What is one of the Seven Wonders of the Medieval World?\n'

In [37]:
chain = prompt | model | parser
chain.invoke({
    "question": "What is one of the Seven Wonders of the Medieval World?",
    "context": "The Great Wall of China is one of the Seven Wonders of the Medieval World"
})

'The Great Wall of China'

## Combining `Chains`
We can combine different chains to create more complex workflows. For example, let's create a second chain that translates the answer from the first chain into a different language.

Let's start by creating a new prompt template for the translation chain.

In [10]:
# Create a `translation` chain
translation_prompt = ChatPromptTemplate.from_template(
    "Translate the {answer} to the {language}"
)

In [39]:
from operator import itemgetter

translation_chain = (
    {"answer": chain, "language": itemgetter("language")} | translation_prompt | model | parser
)

translation_chain.invoke(
    {
        "context": "The Great Wall of China is one of the Seven Wonders of the Medieval World",
        "question": "What is one of the Seven Wonders of the Medieval World?",
        "language": "Chinese",
    }
)

'中国长城 (Zhōngguó Chángchéng)'

## Transcribing the YouTube Video
The context we want to send the model comes from a YouTube video. Let's download the video and transcribe it using OpenAI's Whisper - https://openai.com/research/whisper

In [12]:
!pip install -q whisper pytube openai-whisper

In [13]:
import tempfile
import whisper
from pytube import YouTube

# create an `if` only if we haven't created the transcription file yet.
if not os.path.exists("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO)
    audio = youtube.streams.filter(only_audio=True).first()

    # load the `base` model
    whisper_model = whisper.load_model("base")
    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path=tmpdir)
        transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()
        with open("transcription.txt", "w") as file:
            file.write(transcription)

- Let's read the transcription and display the first few characters to ensure everything works as expected.

In [14]:
with open("transcription.txt", "r") as file:
    transcription = file.read()

transcription[:100]

"I think it's possible that physics has exploits and we should be trying to find them. arranging some"

## Using the entire transcription as context
If we try to invoke the chain using the transcription as context, the model will return an error because the context is too long.

Large Language Models support limitted context sizes. The video we are using is too long for the model to handle, so we need to find a different solution.

In [15]:
try:
  chain.invoke({
      "context": transcription,
      "question": "Is reading papers a good idea?"
  })
except Exception as e:
  print(e)

Error code: 400 - {'error': {'message': "This model's maximum context length is 16385 tokens. However, your messages resulted in 47047 tokens. Please reduce the length of the messages.", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}


## Splitting the transcription
Since we can't use the entire transcription as the context for the model, a potential solution is to split the transcription into smaller chunks. We can then invoke the model using only the relevant chunks to answer a particular question.

- Let's start loading the transcription in `memory`

In [16]:
!pip install -q langchain_community

In [17]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("transcription.txt")
text_documents = loader.load()

There are many different ways to split a document - https://python.langchain.com/docs/modules/data_connection/document_transformers/.

In [18]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
)
documents = text_splitter.split_documents(text_documents)

## Finding the relevant chunks
Given a particular question, we must find the relevant chunks from the transcription to send to the model. Here is where the idea of `embeddings` comes in - https://dashboard.cohere.com/playground/embed.

In [40]:
from langchain_openai.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
embedded_query = embeddings.embed_query("What is one of the Seven Wonders of the Medieval World?")

print(f"Embedding lenght: {len(embedded_query)}")
print(embedded_query[:10])

Embedding lenght: 1536
[0.014555458821796902, -0.010473769094106803, 0.007913086514982138, -0.0074895154227005364, -0.006648790050637212, -0.008824407069092396, -0.020305763805968755, -0.03126728143058164, 0.002188452971761366, -0.030651178023626584]


In [41]:
sentence_1 = embeddings.embed_query("John's favorite sport is basketball")
sentence_2 = embeddings.embed_query("Emma's favorite food is sushi")


We can now compute the similarity between the query and each of the two sentences. The closer the embeddings are, the more similar the sentences will be.

We can use Cosine Similarity to calculate the similarity between the query and each of the sentences - https://en.wikipedia.org/wiki/Cosine_similarity.

In [42]:
from sklearn.metrics.pairwise import cosine_similarity

query_sentence_1 = cosine_similarity([embedded_query], [sentence_1])[0][0]
query_sentence_2 = cosine_similarity([embedded_query], [sentence_2])[0][0]

query_sentence_1, query_sentence_2

(0.7232313849506155, 0.6990840014188955)

## Setting up a Vector Store
We need an efficient way to store document chunks, their embeddings, and perform similarity searches at scale. To do this, we'll use a vector store. A vector store is a database of embeddings that specializes in fast similarity searches.

To understand how it works, first let's create the `vector-store` in memory.

In [25]:
!pip install --upgrade --quiet  "docarray"

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/270.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m225.3/270.2 kB[0m [31m6.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m270.2/270.2 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [43]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore1 = DocArrayInMemorySearch.from_texts(
    [
        "Anna's brother is Michael",
        "David and Sarah are twins",
        "Lisa prefers red apples",
        "Mark's father is a doctor",
        "Sophia rides a bicycle",
        "Emily owns a bookstore",
    ],
    embedding=embeddings,
)

In [44]:
vectorstore1.similarity_search_with_score("Who likes bicycle", k=3)

[(Document(page_content='Sophia rides a bicycle'), 0.8466580439383807),
 (Document(page_content='Lisa prefers red apples'), 0.7709716918314516),
 (Document(page_content='Emily owns a bookstore'), 0.7585758891421106)]

## Connecting the vector store to the chain
We can use the vector store to find the most relevant chunks from the transcription to send to the model.

We need to configure a Retriever. The retriever will run a similarity search in the vector store and return the most similar documents back to the next step in the chain - https://python.langchain.com/docs/modules/data_connection/retrievers/.

In [47]:
retriever_1 = vectorstore1.as_retriever()
retriever_1.invoke("What fruits does Lisa prefer?")

[Document(page_content='Lisa prefers red apples'),
 Document(page_content='Emily owns a bookstore'),
 Document(page_content="Anna's brother is Michael"),
 Document(page_content='Sophia rides a bicycle')]

Our prompt expects two parameters, `"context"` and `"question"`. We can use the retriever to find the chunks we'll use as the context to answer the question.

We can create a map with the two inputs by using the `RunnableParallel` and `RunnablePassthrough` classes. This will allow us to pass the context and question to the prompt as a map with the keys "context" and "question."

In [48]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
setup = RunnableParallel(context=retriever_1, question=RunnablePassthrough())
setup.invoke("What fruits does Lisa prefer?")

{'context': [Document(page_content='Lisa prefers red apples'),
  Document(page_content='Emily owns a bookstore'),
  Document(page_content="Anna's brother is Michael"),
  Document(page_content='Sophia rides a bicycle')],
 'question': 'What fruits does Lisa prefer?'}

In [49]:
chain = setup | prompt | model | parser
chain.invoke("What fruits does Lisa prefer?")

"I don't know."

## Loading transcription into the vector store
We initialized the vector store with a few random strings. Let's create a new vector store using the chunks from the video transcription.

In [31]:
vectorstore_2 = DocArrayInMemorySearch.from_documents(
    documents,
    embeddings
)

In [33]:
chain = (
    {"context": vectorstore_2.as_retriever(), "question": RunnablePassthrough()}
    | prompt
    | model
    | parser
)
chain.invoke("What is the essential message from the video?")

'The essential message from the video is about the importance of collecting ground truth data for training neural networks, different mechanisms for collecting training data such as human annotation and simulation, and the significance of incremental product improvement and generating revenue along the way.'