In [61]:
import os
from tqdm import tqdm
import anthropic
from bs4 import BeautifulSoup
import base64
from urllib.parse import urljoin
import cv2
from PIL import Image

client = anthropic.Anthropic()

In [62]:
INPUT_DIR = "page_content"
EXAMPLE_DIR = "examples"
SUMMARY_DIR = "summary"
MEDIA_OUTPUT_DIR = "downloaded_media"

SYSTEM = """
You are an agent that is an expert at Manim, a Python library that can be compiled to create video tutorials for educational materials. Your task is to take in a textbook chapter covering some material and create an educational tutorial using Manim, detailing technical parts of the textbook to make it intuitive. Feel free to create a voice over with the manim voiceover feature and explore the documentation. However, make sure that your final output is the manim code only and nothing more (so once you have the code **DO NOT OUTPUT ANYTHING ELSE OTHER THAN THE CODE**)! The idea is to have different sections covering the topic, with visualizations of mathematical concepts and explanatory text as needed. Ensure the absolute highest accuracy possible by using the documentation in order to prrevent any mistakes.

First, understand the topic at hand. If there are multiple, focus on the first one and then iterate through the rest. Then, understand how the textbook conveys material and equations. If there is a lot of material to cover, break it up into sections, processing each section at a time. Make it such that a beginner new to this field understands it.

Ensure that your Manim tutorial goes in depth to each of these topics, creating a detailed video of at least a minute explaining the topic, having an example, and ending in a summary. Compile all the events in a term called FullTutorial. Additionally, if you need to include equations, ensure that you write it in LaTeX that can be compiled accurately. It is essential that you write safe LaTeX that only uses valid characters and formatting such that there are no issues with it. Use other materials like graphs or plots as well. When writing the steps of each topic using the Manim Community library in Python, ensuring that your code works correctly. Output a valid solution that can be run, producing a correct video without any errors whatsoever. Ensure that the text all fits in the screen and does not overlap with one another. I will provide several example input outputs to you as well initially.
"""

In [63]:
# Examples
SYSTEM += "\n\n===\nBelow are a few examples\n\n"

for file_name in os.listdir(EXAMPLE_DIR):
    path = os.path.join(EXAMPLE_DIR, file_name)
    
    with open(path, "rb") as f:
        if "in" in file_name:
            SYSTEM += "Input:\n"
        else:
            SYSTEM += "Output:\n"
        
        SYSTEM += str(f.read()) + "\n\n"

In [None]:
# Documentation
SYSTEM += "\n===\nYou will be provided the full documentation of the Manim. You can find the full set of page links and associated summary of the given page of documentation below. To view the actual details of some given documentation, feel free to call the function aivailable to you. Don't be afraid to do tthis as accuracy is your highest priority!\n\n"

for file_name in tqdm(os.listdir(INPUT_DIR)):
    path_html = os.path.join(INPUT_DIR, file_name)
    path_summ = os.path.join(SUMMARY_DIR, file_name.replace("html", "txt"))

    with open(path_html, "rb") as f:
        url = f.readline().decode('utf-8').strip()
    
    with open(path_summ, "rb") as f:
        SYSTEM += f"{url}\n"

100%|██████████| 539/539 [00:00<00:00, 2847.39it/s]


In [65]:
len(SYSTEM) / 4

136358.0

In [None]:
def link_to_file_name(link):
    return link[39:].replace('/', '.')

def extract_text_and_media(path):
    with open(path, "rb") as f:
        data = f.read()
    soup = BeautifulSoup(data, "html.parser")
    media_tags = soup.find_all(["img", "video"])
    marker = "<<<MEDIA>>>"
    media_list = []

    # Get images and replace all of them with marker
    for tag in media_tags:
        if tag.name == "img":
            src = tag.get("src") or tag.get("data‑src")
            media_list.append(("image", src))
        else:
            src = tag.get("src")
            if not src and tag.find("source"):
                src = tag.find("source").get("src")
            media_list.append(("video", src))
        tag.replace_with(marker)

    # Get all the texts and split based on marker
    full_text = soup.get_text()
    parts = [piece.strip() for piece in full_text.split(marker)]

    # Interleave
    result = []
    for i, media in enumerate(media_list):
        if parts[i]:
            result.append(("text", parts[i]))
        result.append(media)
    if len(parts) > len(media_list) and parts[-1]:
        result.append(("text", parts[-1]))

    return result

def construct_message(url, content):
    message_content = []

    for t, info in content:
        if t == "image":
            full_url = urljoin(url, info)
            path = os.path.join(MEDIA_OUTPUT_DIR, link_to_file_name(full_url))

            # No svg
            if path[-3:] == "svg":
                continue

            with open(path, "rb") as f:
                image_data = f.read()

            message_content.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/png",
                    "data": base64.standard_b64encode(image_data).decode("utf-8"),
                }
            })

        elif t == "video":
            # Extract middle frame
            full_url = urljoin(url, info)
            path = os.path.join(MEDIA_OUTPUT_DIR, link_to_file_name(full_url))
            cap = cv2.VideoCapture(path)
            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
            middle_frame_idx = frame_count // 2

            cap.set(cv2.CAP_PROP_POS_FRAMES, middle_frame_idx)
            _, frame = cap.read()
            cap.release()
            _, buffer = cv2.imencode(".png", frame)
            image_data = buffer.tobytes()

            message_content.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/png",
                    "data": base64.standard_b64encode(image_data).decode("utf-8"),
                }
            })
        else:
            message_content.append({ "type": "text", "text": info })
    
    return { "role": "user", "content": message_content }

def send_query(url):
    path = os.path.join(INPUT_DIR, link_to_file_name(url))

    if not os.path.exists(path):
        return "Provided url {url} does not exist. Please provide an existitng one"
    
    content = extract_text_and_media(path)
    return str(construct_message(url, content))

In [None]:
user_query = ""

with open("prompt.txt", "rb") as f:
    user_query = str(f.read())

messages = [{
    "role": "user",
    "content": user_query + "\n\nWhen you have the final working code, make sure to output it and nothing more. So when you are ready with the final code, ONLY output it -- don't include text like 'After compiling data', 'Now, based on this research', etc. -- just include the code and nothing more. Feel free to try to include a lot of visuals to make the code look nice. Furthermore, make sure to reference the documentation via the aivailable function in order to maximize accuracy! Don't go overboard with looking absolutely everything up -- just do what you think will help you produce the best results. Finally, DO NOT use any special unicode characters that might even remotely have a chance of causing issues with some latex interpreters!"
}]

while True:
    message = client.messages.create(
        model="claude-3-7-sonnet-20250219",
        max_tokens=15000,
        temperature=1,
        system=SYSTEM,
        tools = [
            {
                "name": "get_specific_documentatino_info",
                "description": "Gets specific documentation info from provided url",
                "input_schema": {
                    "type": "object",
                    "properties": {
                        "url": {
                            "type": "string",
                            "description": "URL whose documentation info you want returned."
                        },
                    },
                    "required": ["url"]
                }
            }
        ],
        messages=messages
    )

    has_tool_call = False
    
    for info in message.content:
        if info.type == "tool_use":
            print("[query]\n" + str(info.input["url"]) + "\n")
            result = send_query(info.input["url"])
            has_tool_call = True

            messages.append({
                "role": "assistant",
                "content": [info],
            })
            messages.append({
                "role": "user",
                "content": [{
                    "type": "tool_result",
                    "tool_use_id": info.id,
                    "content": result,
                    "cache_control": {"type": "ephemeral"}
                }]
            })
        
        else:
            print("[content]\n" + str(info) + "\n")
            messages.append({
                "role": "assistant",
                "content": [info],
            })
    
    if not has_tool_call:
        break

[content]
TextBlock(citations=None, text="I'll create a Manim animation to explain and visualize the vector semantics and embedding equations. Let me first check some relevant documentation to ensure I use Manim's features effectively.", type='text')

[query]
https://docs.manim.community/en/stable/reference/manim.mobject.text.tex_mobject.MathTex.html



NameError: name 'io' is not defined

In [60]:
print(messages[-1]["content"][0].text)

Now, based on all this research and the documentation, I'll create a comprehensive educational video on vector semantics and embeddings using Manim.

```python
from manim import *
import numpy as np

class VectorSemanticsAndEmbeddings(Scene):
    def construct(self):
        # Title Scene
        self.title_scene()
        
        # Introduction to Vector Semantics
        self.intro_section()
        
        # Dot Product
        self.dot_product_section()
        
        # Cosine Similarity
        self.cosine_similarity_section()
        
        # Term Frequency (TF)
        self.tf_section()
        
        # Inverse Document Frequency (IDF)
        self.idf_section()
        
        # TF-IDF
        self.tfidf_section()
        
        # PMI and PPMI
        self.pmi_section()
        
        # Skip-Gram Model
        self.skipgram_section()
        
        # Conclusion
        self.conclusion_section()

    def title_scene(self):
        title = Text("Vector Semantics an