In [None]:
from langgraph.graph import START, END, StateGraph
from langgraph.types import Send
from typing import TypedDict
import subprocess
import textwrap
from langchain.chat_models import init_chat_model
from typing_extensions import Annotated
import operator


import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
llm = init_chat_model("openai:gpt-4o-mini")

api_key = os.getenv("OPENAI_API_KEY")


class State(TypedDict):
    video_file: str
    audio_file: str
    transcription: str
    summaries: Annotated[list[str], operator.add]

In [None]:
def extract_audio(state: State):
    output_file = state["video_file"].replace("mp4", "mp3")
    command = [
        "ffmpeg",
        "-i",
        state["video_file"],
        "-filter:a",
        "atempo=2.0",
        output_file,
        "-y",
    ]
    subprocess.run(command)
    return {
        "audio_file": output_file,
    }


def transcribe_audio(state: State):
    client = OpenAI(api_key=api_key)
    with open(state["audio_file"], "rb") as audio_file:
        transcription = client.audio.transcriptions.create(
            model="whisper-1",
            response_format="text",
            file=audio_file,
            language="en",
            prompt="",
        )
        return {"transcription": transcription}


def dispatch_summarizer(state: State):
    transcribtion = state["transcription"]
    chunks = []

    for idx, chunk in enumerate(textwrap.wrap(transcribtion, 500)):
        chunks.append({"id": idx + 1, "chunk": chunk})

    return [Send("summarize_chunk", chunk) for chunk in chunks]


def summarize_chunk(chunk):
    chunk_id = chunk["id"]
    chunk = chunk["chunk"]

    response = llm.invoke(
        f"""
        다음의 텍스트를 요약하세요.
        
        Text: {chunk}
        """
    )
    summary = f"[Chunk {chunk_id} Summary]: {response.content}"
    return {"summaries": [summary]}

In [8]:
graph_builder = StateGraph(State)

graph_builder.add_node("extract_audio", extract_audio)
graph_builder.add_node("transcribe_audio", transcribe_audio)
graph_builder.add_node("summarize_chunk", summarize_chunk)

graph_builder.add_edge(START, "extract_audio")
graph_builder.add_edge("extract_audio", "transcribe_audio")
graph_builder.add_conditional_edges(
    "transcribe_audio", dispatch_summarizer, ["summarize_chunk"]
)
graph_builder.add_edge("summarize_chunk", END)

graph = graph_builder.compile()


In [9]:
graph.invoke(
    {"video_file": "video.mp4"},
)

Summarizing chunk 1 chunk: Running a game requires RAM, which is where the CP...
Summarizing chunk 2 chunk: easy. Creating a single word file today and writin...
Summarizing chunk 3 chunk: form of troopers. You would create 151 Pokemon fir...
Summarizing chunk 4 chunk: memory. That wouldn't work, so back then, we used ...
Summarizing chunk 6 chunk: multiplication function yourself. For example, if ...
Summarizing chunk 5 chunk: this, we needed 151 bars before, but now we can im...
Summarizing chunk 7 chunk: need to prepare. Create a variable to store the nu...
Summarizing chunk 10 chunk: like this. So, if you want to multiply by 2, you j...
Summarizing chunk 8 chunk: Then, add 10 to the result from the variable. And ...
Summarizing chunk 9 chunk: completed in just 8 loops, so the GPU is much bett...
Summarizing chunk 11 chunk: that Pokemon can be inflicted with. For example, i...
Summarizing chunk 12 chunk: I want to check if my Pokemon is currently paralyz...
Summarizing chunk 14 chun

{'video_file': 'video.mp4',
 'audio_file': 'video.mp3',
 'transcription': "Running a game requires RAM, which is where the CPU needs to store the game's data for calculation. And of course, graphic assets need to be loaded, so modern games can consume a huge amount of memory. However, there was a time when such sloppy execution wasn't acceptable. There was a masterpiece called Pokemon Red, but the Game Boy it ran on was a paltry device. The CPU performance was so poor that a mistake in coding could cause an explosion. With only 8 kilobits of RAM, uploading data wasn't easy. Creating a single word file today and writing just one sentence would probably exceed 8 kilobits. So, developers at the time had to employ incredible optimization skills when coding. The Pokemon game had a feature called Pokedex, which remembered and counted the Pokemon you already seen. So in the first generation, there were 151 Pokemon, and whether or not you encountered them all had to be recorded in memory. How 

In [4]:
import textwrap

transcription = "Running a game requires RAM, which is where the CPU needs to store the game's data for calculation. And of course, graphic assets need to be loaded, so modern games can consume a huge amount of memory. However, there was a time when such sloppy execution wasn't acceptable. There was a masterpiece called Pokemon Red, but the Game Boy it ran on was a paltry device. The CPU performance was so poor that a mistake in coding could cause an explosion. With only 8 kilobits of RAM, uploading data wasn't easy. Creating a single word file today and writing just one sentence would probably exceed 8 kilobits. So, developers at the time had to employ incredible optimization skills when coding. The Pokemon game had a feature called Pokedex, which remembered and counted the Pokemon you already seen. So in the first generation, there were 151 Pokemon, and whether or not you encountered them all had to be recorded in memory. How would you do this? If it were today, you would probably save them in the form of troopers. You would create 151 Pokemon first, and then, if you saw Pokemon number 1, change the first digit to troopers, and if you saw Pokemon number 3, change the third digit to troopers. I think the general method would be like this, and troopers usually take up 1 bot of storage, so if you save them like this, you would need a total of 151 bots of memory. This would implement the function, but, uh, just to implement one function, you would end up using 2% of the entire Game Boy memory. That wouldn't work, so back then, we used a much more efficient method, splitting 1 byte, the smallest unit of data, into 8 smaller pieces. This is because 1 byte has 8 hidden spaces that can store numbers. Each space is called a bit. Only 0s and 1s can be stored here. So, after splitting it into 8, we store whether or not we've encountered Pokemon 1 to 8 here. If we encounter a sea above, we fill in 1 here, and if we encounter a grass above, we fill in 1 here again, and so on. If we do this, we needed 151 bars before, but now we can implement the function with about 19 bars, which is about 1. There were many cases where data was split and compressed in that way to make the Game Boy feel good. Nowadays, computers have multiplication and division components built into the CPU. But back then, the CPU didn't have those components, so shockingly, you couldn't code for multiplication and division. So, if you wanted to multiply, you had to write a long code to implement the multiplication function yourself. For example, if you wanted to multiply 10 by 5, you would do the addition multiple times, like in elementary school math class. But this would have performance issues. If you wanted to multiply by a large number like 50 or 100, you would have to run the loop 50 or 100 times. That's right, if you did that, the Game Boy CPU would explode. So, I created a very novel multiplication method and used it directly. For example, if you want to multiply 10 by 5, you first need to prepare. Create a variable to store the number to be multiplied, 10. Then, convert the number to be multiplied, 5, into binary like this. That's all the preparation. Now, you have this binary number, right? Let's subtract bits 1 by 1 from the rightmost 1. Now, the subtracted bit is 1. Then, we add the value in the variable to the result. Now, if the subtracted bit is 0, we just pass. And each time we subtract a bit, we multiply the variable by 2. If we subtract a bit like this, it's 1. Then, add 10 to the result from the variable. And since the turn is over, multiply the variable by 2. If you subtract the next bit, it's 0. Then, pass. Okay, next, multiply the variable by 2. Okay, the next bit is 1. Okay, then add the variable to the result. Okay, then multiply it by 2. And you keep doing this. Anyway, this way, you can get the 10 asterisk 5 result like this. But the important thing is that even if the number to be multiplied is not 5 but a larger number, the multiplication is completed in just 8 loops, so the GPU is much better. But there's one strange thing. Doesn't it seem a bit strange that we're multiplying by 2 now? We said we were implementing multiplication, but aren't we already using multiplication? Doesn't that seem strange? Actually, multiplying by 2 can be handled using a representation. So you just shift the bits one space to the left, and that's it. For example, this is the number 6. This is the number 12. If you convert these to binary, it comes out like this. So, if you want to multiply by 2, you just shift it one space to the left. Do you understand? Right? Conversely, if you want to divide by 2, you just shift it one space to the right. And shifting bits is very fast for the CPU. That's why you often see square numbers used in coding, and these are all numbers with history and tradition. In the past, it was good for performance and easy to code to use values that were easy to divide and multiply by 2. There are about 5 status effects that Pokemon can be inflicted with. For example, if my Pokemon is inflicted with a status ailment, I need to store this information in memory, but these aren't each stored in a single byte. Instead, I roughly split one byte in 8, and then if the Pokemon is paralyzed, I set the bit here to 1. If it's poisoned, I set the bit here to 1. Of course, this is done to save memory, but there's another huge advantage to this. I can perform comparison operations at the speed of light. For example, what if I want to check if my Pokemon is currently paralyzed or poisoned? I could probably use multiple if conditional statements like this, right? But there's no reason to write such long code, because if I design the data structure like I did before, I can use the n operator to check it in one go. Now, n is an operator that compares bits. For example, if you use it on these two numbers, it's an operator that compares the bits in the same position like this, and only leaves a 1 if both are 1. So, if you want to check for paralysis or poisoning, you create a number like this with 1 in the paralysis and poison bit positions. Then, you can do an end operation with your current Pokemon status. Then, since it outputs a 1 only if both numbers in the same position are 1, if there's a 1 somewhere, you can assume it's paralyzed or poisoned, right? Then, with just one operation, you can check for paralysis or poisoning. You can see traces of efficient operations using bit operations like this. Anyway, there was a time in the past when you coded while fiddling with the RAM and CPU like this, but these days, all kinds of CPU parts are naturally included like this, and there's enough memory, the CPU cache is big, and the compilers are now good. So, even if you write code that is crappy, the code will be automatically optimized, so there's no reason to do that these days. But I think this is a bit too much.\n"

for idx, chunk in enumerate(textwrap.wrap(transcription, 500)):
    print(f"Chunk {idx + 1}:\n{chunk}\n")

Chunk 1:
Running a game requires RAM, which is where the CPU needs to store the game's data for calculation. And of course, graphic assets need to be loaded, so modern games can consume a huge amount of memory. However, there was a time when such sloppy execution wasn't acceptable. There was a masterpiece called Pokemon Red, but the Game Boy it ran on was a paltry device. The CPU performance was so poor that a mistake in coding could cause an explosion. With only 8 kilobits of RAM, uploading data wasn't

Chunk 2:
easy. Creating a single word file today and writing just one sentence would probably exceed 8 kilobits. So, developers at the time had to employ incredible optimization skills when coding. The Pokemon game had a feature called Pokedex, which remembered and counted the Pokemon you already seen. So in the first generation, there were 151 Pokemon, and whether or not you encountered them all had to be recorded in memory. How would you do this? If it were today, you would probably 