In [18]:
import os
from tqdm import tqdm
import anthropic
from bs4 import BeautifulSoup
import re
from manim import * 

client = anthropic.Anthropic()

In [19]:
INPUT_DIR = "page_content"
EXAMPLE_DIR = "examples"
SUMMARY_DIR = "summary"
MEDIA_OUTPUT_DIR = "downloaded_media"
VOICEOVER_FILE_NAME = "voiceover_doc.txt"

SYSTEM = """
You are an agent that is an expert at Manim, a Python library that can be compiled to create video tutorials for educational materials. Your task is to take in a textbook chapter covering some material and create an educational tutorial using Manim, detailing technical parts of the textbook to make it intuitive. Feel free to create a voice over with the manim voiceover feature and explore the documentation. Ensure the absolute highest accuracy possible by using the documentation in order to prevent any mistakes.

First, understand the topic at hand. If there are multiple, focus on the first one and then iterate through the rest. Then, understand how the textbook conveys material and equations. If there is a lot of material to cover, break it up into sections, processing each section at a time. Make it such that a beginner new to this field understands it.

Ensure that your Manim tutorial goes in depth to each of these topics, creating a detailed video of at least a minute explaining the topic, having an example, and ending in a summary. Compile all the events in a term called FullTutorial. Additionally, if you need to include equations, ensure that you write it in LaTeX that can be compiled accurately. It is essential that you write safe LaTeX that only uses valid characters and formatting such that there are no issues with it. Use other materials like graphs or plots as well. When writing the steps of each topic using the Manim Community library in Python, ensuring that your code works correctly. Output a valid solution that can be run, producing a correct video without any errors whatsoever. Ensure that the text all fits in the screen and does not overlap with one another. I will provide several example input outputs to you as well initially.
"""

In [20]:
# Examples
SYSTEM += "\n\n===\nBelow are a few examples\n\n"

for file_name in os.listdir(EXAMPLE_DIR):
    path = os.path.join(EXAMPLE_DIR, file_name)
    
    with open(path, "rb") as f:
        if "in" in file_name:
            SYSTEM += "Input:\n"
        else:
            SYSTEM += "Output:\n"
        
        SYSTEM += str(f.read()) + "\n\n"

In [21]:
# Documentation
SYSTEM += "\n===\nYou will be provided the full documentation of the Manim. You can find the full set of page links and associated summary of the given page of documentation below. To view the actual details of some given documentation, feel free to call the function aivailable to you. Don't be afraid to do tthis as accuracy is your highest priority!\n\n"

for file_name in tqdm(os.listdir(INPUT_DIR)):
    path_html = os.path.join(INPUT_DIR, file_name)
    path_summ = os.path.join(SUMMARY_DIR, file_name.replace("html", "txt"))

    with open(path_html, "rb") as f:
        url = f.readline().decode('utf-8').strip()
    
    with open(path_summ, "rb") as f:
        SYSTEM += f"{url}\n{f.read()}\n\n"

# Voiceover (seperate from rest of documentation so include it fully here)
SYSTEM += "\n\nThe documentation for voiceover is seperate so will be fully included below (no need to look specific voiceover stuff up that's included here)\n\n"

with open(VOICEOVER_FILE_NAME, "rb") as f:
    SYSTEM += str(f.read())


100%|██████████| 539/539 [00:00<00:00, 1812.97it/s]


In [22]:
len(SYSTEM) / 4

109371.0

In [23]:
def link_to_file_name(link):
    return link[39:].replace('/', '.')

def extract_text_and_media(path):
    with open(path, "rb") as f:
        data = f.read()
    soup = BeautifulSoup(data, "html.parser")
    media_tags = soup.find_all(["img", "video"])
    marker = "<<<MEDIA>>>"
    media_list = []

    # Get images and replace all of them with marker
    for tag in media_tags:
        if tag.name == "img":
            src = tag.get("src") or tag.get("data‑src")
            media_list.append(("image", src))
        else:
            src = tag.get("src")
            if not src and tag.find("source"):
                src = tag.find("source").get("src")
            media_list.append(("video", src))
        tag.replace_with(marker)

    # Get all the texts and split based on marker
    full_text = soup.get_text()
    parts = [piece.strip() for piece in full_text.split(marker)]

    # Interleave
    result = []
    for i, media in enumerate(media_list):
        if parts[i]:
            result.append(("text", parts[i]))
        result.append(media)
    if len(parts) > len(media_list) and parts[-1]:
        result.append(("text", parts[-1]))

    return result

def construct_message_without_media(content):
    message_content = []
    for t, info in content:
        if t == "image" or t == "video":
            continue
        else:
            message_content.append({ "type": "text", "text": info })

    return { "role": "user", "content": message_content }

def send_query(url):
    path = os.path.join(INPUT_DIR, link_to_file_name(url))

    if not os.path.exists(path):
        return "Provided url {url} does not exist. Please provide an existitng one"
    
    content = extract_text_and_media(path)
    return str(construct_message_without_media(content))

In [None]:
user_query = ""
caches = 0

with open("prompt.txt", "rb") as f:
    user_query = str(f.read())

messages = [{
    "role": "user",
    "content": f"""{user_query}

Instructions:
1) Create manim code for a video that facilitates understanding -- try to name the single overarching class "FullTutorial".
2) Please use voiceover if you deem that it will be helpful.
3) Feel free to include a lot of visuals to make the animation look nice. In fact, a shorter video with a bunch of cool visuals is definitely preferred over a longer one with just text.
4) Make sure to reference the documentation (when you are even slightly confused) via the available function in order to maximize accuracy. Avoiding errors is a MUST here!

Here are some extremely important things that you must make sure to follow:
1) AVOID LATEX ERRORS AT ALL COSTS (don't do anything even slightly risky) <-- we need the code to actually run
    a) DO NOT use ANY special unicode or greek characters that might even remotely have a chance of causing issues with some latex interpreters
    b) Be very careful about special characters like &
    c) Be very careful about line breaks and stuff
2) Make sure that you fade all text/images at the appropriate time so nothing lingers for longer than its supposed to. For example, we don't want to see some title or body from several scenes ago persist throughout the whole video. EVERYTHING should be temporary. Triple check this and ensure that NOTHING at all persists. This is one of the most important things.
3) Make sure that there are no run time errors and the code plays. If your task at hand is advanced, you may find it helpful to break it up and ensure each part is correct.
4) Avoid list index errors at all costs. Double and triple check your indexing everytime you access an index. Make sure that the array isn't empty and its not out of bounds. This part is cruicial.
5) Make sure everything stays in frame -- we don't want things like the title or some image being cut out!
6) Make sure that there are no common python syntactical / runtime errors!
7) Make sure that audio is properly integrated with the mp4
"""
}]

def extract_python_block(text):
    pattern = r"```python\n(.*?)\n```"
    match = re.search(pattern, text, re.DOTALL)
    if match:
        return match.group(1)
    return None


while True:
    # 1) Keep using tool calling until code is generated
    while True:
        message = client.messages.create(
            model="claude-3-7-sonnet-20250219",
            max_tokens=19999,
            temperature=1,
            system=SYSTEM,
            tools = [
                {
                    "name": "get_specific_documentatino_info",
                    "description": "Gets specific documentation info from provided url",
                    "input_schema": {
                        "type": "object",
                        "properties": {
                            "url": {
                                "type": "string",
                                "description": "URL whose documentation info you want returned."
                            },
                        },
                        "required": ["url"]
                    }
                }
            ],
            messages=messages
        )

        has_tool_call = False
        
        for info in message.content:
            if info.type == "tool_use":
                print("[query]\n" + str(info.input["url"]) + "\n")
                result = send_query(info.input["url"])
                has_tool_call = True

                messages.append({
                    "role": "assistant",
                    "content": [info],
                })
                messages.append({
                    "role": "user",
                    "content": [{
                        "type": "tool_result",
                        "tool_use_id": info.id,
                        "content": result,
                        # "citations": {"enabled": True}
                    }]
                })
                # Update and see if we can cache
                caches += 1

                if caches <= 4:
                    messages[-1]["content"][0]["cache_control"] = {"type": "ephemeral"}
            
            else:
                print("[content]\n" + str(info) + "\n")
                messages.append({
                    "role": "assistant",
                    "content": [info],
                })
        
        if not has_tool_call:
            break
    
    # 2) Check the code
    try:
        print("[running code]\n")

        def run_manim(code: str, quality="low_quality"):
            ns = {}
            exec(code, ns)

            # Grab the class we expect
            scene_cls = ns.get("FullTutorial")
            if scene_cls is None:
                raise ValueError("The provided code did not define a class 'FullTutorial'.")

            with tempconfig({"quality": quality}):
                scene_cls().render()

        run_manim(extract_python_block(messages[-1]["content"][0].text))

    except Exception as e:
        print("[error]\n" + str(e) + "\n")
        messages.append({ "role": "user", "content": f"Code gave the following error:\n{str(e)}" })
        continue

    break


[content]
TextBlock(citations=None, text="I'll create a Manim animation with voiceover that explains gradient descent based on the given document. I'll focus on making it visually engaging while ensuring it runs without errors.\n\nLet me check the Manim documentation for voiceover integration first to ensure accuracy:", type='text')

[query]
https://docs.manim.community/en/stable/guides/add_voiceovers.html



[content]
TextBlock(citations=None, text='Now I\'ll create a focused, visually engaging Manim animation about gradient descent with voiceover support. I\'ll use the VoiceoverScene from manim_voiceover and structure the animation to cover the key points from the document.\n\n```python\nfrom manim import *\nfrom manim_voiceover import VoiceoverScene\nfrom manim_voiceover.services.gtts import GTTSService\nimport numpy as np\n\nclass FullTutorial(VoiceoverScene):\n    def construct(self):\n        # Set up the speech service (using Google TTS for reliability)\n        self.set_speech_service(GTTSService())\n        \n        # SECTION 1: INTRODUCTION\n        self.intro_section()\n        \n        # SECTION 2: 1D GRADIENT DESCENT\n        self.one_d_gradient_descent()\n        \n        # SECTION 3: MULTI-DIMENSIONAL GRADIENT DESCENT\n        self.multi_d_gradient_descent()\n        \n        # SECTION 4: APPLICATION TO REGRESSION\n        self.regression_application()\n        \n        

                                                                                              

                                                                                                              

                                                                                                                     

                                                                                                                              

                                                                                 

                                                                               

                                                                

                                                                                   

                                                                                                                                                                                                                

                                                                                                  

                                                                                                           

                                                                                                                                                                                                         

                                                                                     

                                                                                                                                                                                                               

                                                                                               

                                                                                 

                                                                          

                                                                                                    

                                                                                                     

                                                                                                                 

                                                                                                                           

                                                                                                                                                                                                                                                  

                                                                                                                                                            

                                                                                                                                                                                                                                                         

[error]
'FullTutorial' object has no attribute 'set_camera_orientation'



[content]
TextBlock(citations=None, text="I'll fix that error. The `set_camera_orientation` method is available in `ThreeDScene` but not in regular scenes. Since we're using `VoiceoverScene` (which inherits from `Scene`), we need to make our scene inherit from both `VoiceoverScene` and `ThreeDScene` to get 3D capabilities. Let me correct this:", type='text')

[query]
https://docs.manim.community/en/stable/reference/manim.scene.three_d_scene.ThreeDScene.html



[content]
TextBlock(citations=None, text='Now I\'ll fix the script by using multiple inheritance to combine the features of `VoiceoverScene` and `ThreeDScene`. Here\'s the updated code:\n\n```python\nfrom manim import *\nfrom manim_voiceover import VoiceoverScene\nfrom manim_voiceover.services.gtts import GTTSService\nimport numpy as np\n\nclass FullTutorial(VoiceoverScene, ThreeDScene):\n    def construct(self):\n        # Set up the speech service (using Google TTS for reliability)\n        self.set_speech_service(GTTSService())\n        \n        # SECTION 1: INTRODUCTION\n        self.intro_section()\n        \n        # SECTION 2: 1D GRADIENT DESCENT\n        self.one_d_gradient_descent()\n        \n        # SECTION 3: MULTI-DIMENSIONAL GRADIENT DESCENT\n        self.multi_d_gradient_descent()\n        \n        # SECTION 4: APPLICATION TO REGRESSION\n        self.regression_application()\n        \n        # SECTION 5: STOCHASTIC GRADIENT DESCENT\n        self.stochastic_gradien

                                                                                              

                                                                                                              

                                                                                                                     

                                                                                                                             

                                                                                 

                                                                               

                                                                

                                                                                   

                                                                                                                                                                                                                

                                                                                                  

                                                                                                           

                                                                                                                                                                                                       

[error]
list index out of range

