In [60]:
import textwrap

def print_wrapped(text: str, width: int = 128):
    wrapped_content = textwrap.fill(text=text, width=width)
    print(wrapped_content)

# Set up SQLite

It's useful to save some metadata about the video for future use.

In [61]:
from peewee import SqliteDatabase, Model, CharField, BooleanField, IntegerField, DateTimeField
sql_db = SqliteDatabase('data/videos.sqlite3')

In [62]:
class Video(Model):
    yt_video_id = CharField(unique=True)
    title = CharField()
    language = CharField(null=True)
    channel = CharField(null=True)
    saved_on = DateTimeField(null=True)
    preprocessed = BooleanField(null=True)
    chunk_size = IntegerField(null=True)
    transcript_token_num = IntegerField(null=True)

    class Meta:
        database = sql_db

In [63]:
sql_db.connect()
sql_db.create_tables([Video])

In [64]:
from datetime import datetime

print(datetime.now())

2024-06-08 13:52:58.690511


In [None]:
sql_db.close()

# Fetch transcript

In [65]:
from modules.youtube import fetch_youtube_transcript, extract_youtube_video_id
from modules.helpers import save_response_as_file
from modules.helpers import num_tokens_from_string

video_url = "https://youtu.be/Jz8Gs4UHTO8?si=y3Fwkehso7pJ0RO1"
video_id = extract_youtube_video_id(video_url)
transcript = fetch_youtube_transcript(video_url)

In [66]:
from modules.youtube import get_video_metadata

meta = get_video_metadata(video_url)
video_title = meta['name']
print(video_title)
save_response_as_file("transcripts", video_title, transcript)

Big Misconceptions about Bare Metal, Virtual Machines, and Containers


In [69]:
video = Video.create(
    yt_video_id = video_id,
    title = video_title,
    channel = meta['channel']
)

IntegrityError: UNIQUE constraint failed: video.yt_video_id

In [70]:
Video.update({Video.saved_on: datetime.now()}).where(Video.yt_video_id == video_id).execute()

1

# Split yet unprocessed transcript into chunks

A relatively small chunk size is used, as the model tends to ignore the middle part of the transcript, if it's too long. Probably because of "Lost in the middle".

- https://arxiv.org/abs/2307.03172

In [74]:
CHUNK_SIZE_FOR_UNPROCESSED_TRANSCRIPT = 932

In [82]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE_FOR_UNPROCESSED_TRANSCRIPT,
    chunk_overlap=32,
    length_function=num_tokens_from_string,
    is_separator_regex=False,
)

# Split the transcript into chunks 
transcript_excerpts = text_splitter.create_documents([transcript])
print(f"Split unprocessed transcript into {len(transcript_excerpts)} chunks.")

for t in transcript_excerpts:
    print(t.page_content)
    print("---------")

Split unprocessed transcript into 2 chunks.
Hi. Welcome to another system design video.
What are the differences between bare metal,
virtual machines, and containers?
When deploying a modern application stack,
how do we decide which one to use?
In this video, we’ll take a closer look at each of these.
Let’s dive right in.
  The granddaddy of these is bare metal. 
A bare metal server is a physical computer
that is a single tenant only.
Once upon a time, all servers were bare metal.
Bare metal gives us complete control 
over the hardware resources
and the software stack to run.
For software applications that require the absolute
highest performance from the hardware,
bare metal could be a good way to go.
Bare metal servers are physically isolated.
The isolation provides two benefits:
First, it is not affected by the noisy neighbor problem.
This problem occurs when one tenant's performance
is impacted because of the activities
of another tenant sharing the same hardware.
Second, the isola

In [76]:
num_tokens_transcript = num_tokens_from_string(transcript, encoding_name="cl100k_base")
print_wrapped(f"The unprocessed transcript has {num_tokens_transcript} tokens.")

The unprocessed transcript has 1212 tokens.


# Initialize LLM and prompts

## Option 1: OpenAI (GPT-3.5-turbo)

In [153]:
from os import getenv
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv

load_dotenv()

llm = ChatOpenAI(
    api_key=getenv("OPENAI_API_KEY"),
    temperature=0.3,
    model="gpt-3.5-turbo",
    max_tokens=2048
)

In [78]:
from langchain_core.prompts.chat import SystemMessage, HumanMessagePromptTemplate

user_prompt = HumanMessagePromptTemplate.from_template(
    """Here is part {number}, delimited by ---

    ---
    {transcript_excerpt}
    ---
    """
)

In [79]:
system_prompt = "You are giong to receive excerpts from an automatically generated video transcript. Your task is to convert every excerpt into structured text. Ensure that the content of the excerpts remains unchanged. Add appropriate punctuation, correct any grammatical errors, remove filler words and divide the text into logical paragraphs, separating them with a single new line. The final output should be in plain text and only include the modified transcript excerpt without any prelude."
print("Token number in system prompt: " + str(num_tokens_from_string(system_prompt)))

Token number in system prompt: 85


# Process transcript

In [83]:
batch_messages = []
for num, excerpt in enumerate(transcript_excerpts):
    batch_messages.append([
        SystemMessage(content=system_prompt),
        user_prompt.format(number=num, transcript_excerpt=excerpt.page_content)
    ])
response = llm.generate(batch_messages)

In [170]:
result =  "\n\n".join(gen[0].text for gen in response.generations)

AttributeError: 'list' object has no attribute 'generations'

In [87]:
num_tokens_response = num_tokens_from_string(result, encoding_name="cl100k_base")
print(f"The initial transcript has {num_tokens_transcript} tokens.")
print(f"The response has {num_tokens_response} tokens.")

The initial transcript has 1212 tokens.
The response has 1180 tokens.


In [89]:
save_response_as_file(dir_name="transcripts_processed", filename=video_title, file_content=result)

In [90]:
Video.update(
    {
        Video.preprocessed: True,
        Video.transcript_token_num: num_tokens_transcript
    }).where(Video.yt_video_id == video_id).execute()

1

# Split the processed transcript

In [91]:
CHUNK_SIZE_FOR_PROCESSED_TRANSCRIPT = 1024

In [92]:
Video.update(
    {
        Video.chunk_size: CHUNK_SIZE_FOR_PROCESSED_TRANSCRIPT
    }).where(Video.yt_video_id == video_id).execute()

1

In [93]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
chunks = splitter.create_documents([result])
for chunk in chunks:
    print_wrapped(chunk.page_content)
    print("----------------")

Hi. Welcome to another system design video.  What are the differences between bare metal, virtual machines, and containers? When
deploying a modern application stack, how do we decide which one to use? In this video, we’ll take a closer look at each of
these. Let’s dive right in.
----------------
The granddaddy of these is bare metal. A bare metal server is a physical computer that is a single tenant only. Once upon a
time, all servers were bare metal. Bare metal gives us complete control over the hardware resources and the software stack to
run. For software applications that require the absolute highest performance from the hardware, bare metal could be a good way
to go. Bare metal servers are physically isolated. The isolation provides two benefits: First, it is not affected by the noisy
neighbor problem. This problem occurs when one tenant's performance is impacted because of the activities of another tenant
sharing the same hardware. Second, the isolation provides the highest leve

# Create a vector DB

## Option 1: OpenAI embeddings

In [94]:
from langchain_openai import OpenAIEmbeddings

# https://platform.openai.com/docs/models/embeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [110]:
import chromadb
from langchain_chroma import Chroma
from chromadb.config import Settings

chroma_settings = Settings(allow_reset=True)
chroma_client = chromadb.HttpClient(settings=chroma_settings)

db = Chroma(
    client=chroma_client, collection_name=f"{video_id}_{CHUNK_SIZE_FOR_PROCESSED_TRANSCRIPT}", embedding_function=embeddings
)

In [111]:
import uuid

collection = chroma_client.get_or_create_collection(name=f"{video_id}_{CHUNK_SIZE_FOR_PROCESSED_TRANSCRIPT}")
if collection.count() <= 0:
    for d in chunks:
        response = embeddings.embed_query(d.page_content)
        collection.add(
            ids=[str(uuid.uuid1())],
            embeddings=[response],
            documents=[d.page_content],
            #metadatas=[d.metadata]
        )

# Test generation - answer a question

In [163]:
question = "What are the benefits of caontainers compared to virtual machines?"

In [164]:
retriever = db.as_retriever(search_kwargs={"k": 3})
relevant_docs = retriever.invoke(input=question)

In [165]:
for doc in relevant_docs:
    print_wrapped(doc.page_content)
    print("----------------------")

The host operating system. Instead of virtualizing the hardware with a hypervisor, we virtualize the operating system itself
with a piece of special software called the container engine. On top of the container engine runs many containers. Each of these
is its own application environment isolated from each other. The container engine provides even faster resource provisioning.
All the resources needed to run the application are packaged together, so that the applications can run anywhere. Containers are
scalable and portable. They are lightweight and require less hardware resources to run than virtual machines. A bare metal
server can host significantly more containers than virtual machines. Since each container runs as a native process of the host
operating system, they are much faster to start, too. All these make containers even easier to deploy and maintain at scale.
----------------------
What are the benefits of virtual machines? Virtual machines are cheaper to run. Many of them 

In [166]:
from langchain_core.prompts import PromptTemplate

rag_prompt = PromptTemplate.from_template("""Context: {context}
                                          
Answer the question based on the context provided above. Keep your answer ground in the facts of the context.
If the context does not contain the facts to answer the question, apologize and say that you don't know the answer.
                                          
Here is the question: {question}

""")

In [167]:
def format_docs_for_context(docs):
    return "\n\n---\n\n".join(doc.page_content for doc in docs)

In [168]:
from langchain_core.output_parsers import StrOutputParser

rag_chain = rag_prompt | llm | StrOutputParser()

answer = rag_chain.invoke({"question": question, "context": format_docs_for_context(relevant_docs)})

In [169]:
print_wrapped(answer)

The benefits of containers compared to virtual machines include faster resource provisioning, scalability, portability,
lightweight nature requiring less hardware resources, and the ability to host significantly more containers on a bare metal
server. Containers also start much faster as they run as native processes of the host operating system, making them easier to
deploy and maintain at scale.
