In [22]:
from dotenv import load_dotenv
from openai import OpenAI
from elevenlabs.client import AsyncElevenLabs
import asyncio
import json
import os
import argparse
import concurrent.futures

from manga_extraction import extract_all_pages_as_images, save_important_pages, split_volume_into_parts, save_all_pages
from vision_analysis import analyze_images_with_gpt4_vision, detect_important_pages, VISION_PRICE_PER_TOKEN 
from prompts import DRAMATIC_PROMPT, BASIC_PROMPT, BASIC_PROMPT_WITH_CONTEXT,  BASIC_INSTRUCTIONS, KEY_PAGE_IDENTIFICATION_INSTRUCTIONS
from citation_processing import extract_text_and_citations, extract_script
from movie_director import make_movie
from panel_extractor.panel_extractor import PanelExtractor
load_dotenv()  # Load environment variables from .env file

True

In [23]:
volume_number = 10
manga = "naruto"

In [24]:
# Initialize OpenAI client with API key
client = OpenAI()
# get elevenlabs api key from dotenv
narration_client = AsyncElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))

print("Extracting all pages from the volume...")
volume_scaled_and_unscaled = extract_all_pages_as_images(f"{manga}/v{volume_number}/v{volume_number}.pdf")
volume = volume_scaled_and_unscaled["scaled"]
print("Total pages in volume:", len(volume))

Extracting all pages from the volume...
Total pages in volume: 177


In [25]:
dir = save_all_pages(volume_scaled_and_unscaled["full"], manga, volume_number)

In [27]:
panel_extractor = PanelExtractor(keep_text=True, min_pct_panel=2, max_pct_panel=90)
panel_extractor.extract(dir)

Load text detector ... Done!
Loading images ... Done!


Removing images with paper texture: 100%|██████████| 177/177 [00:01<00:00, 93.38it/s]
extracting panels: 154it [00:02, 70.58it/s]


In [31]:
profile_reference = extract_all_pages_as_images(f"{manga}/profile-reference.pdf")["scaled"]
chapter_reference = extract_all_pages_as_images(f"{manga}/chapter-reference.pdf")["scaled"]

profile_pages = []
chapter_pages = [] 

important_page_tokens = 0

batch_size = 20

print("Identifying important pages in the volume...")
# Function to wrap the detect_important_pages call
def process_batch(start_idx, pages):
    response = detect_important_pages(profile_reference, chapter_reference, pages, client,
        KEY_PAGE_IDENTIFICATION_INSTRUCTIONS, KEY_PAGE_IDENTIFICATION_INSTRUCTIONS)
    return start_idx, response

# Using ThreadPoolExecutor to parallelize API calls
with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = []
    for i in range(0, len(volume), batch_size):
        pages = volume[i:i+batch_size]
        futures.append(executor.submit(process_batch, i, pages))

    for future in concurrent.futures.as_completed(futures):
        start_idx, response = future.result()
        end_index = start_idx + batch_size - 1
        print(f"Processing pages {start_idx} to {min(end_index, len(volume)-1)}")
        
        ip = response["parsed_response"]
        print(json.dumps(ip, indent=2))
        for page in ip:
            if page["type"] == "profile":
                profile_pages.append(page["image_index"] + start_idx)
            elif page["type"] == "chapter":
                chapter_pages.append(page["image_index"] + start_idx)

        important_page_tokens += response["total_tokens"]

profile_pages.sort()
chapter_pages.sort()

print("Total tokens to extract profiles and chapters:", important_page_tokens)
print("\n__________\n")
print("Profile pages:", profile_pages)
print("Chapter pages:", chapter_pages)

Identifying important pages in the volume...
Using GPT as a backup to format JSON object...
Processing pages 60 to 79
[
  {
    "image_index": 5,
    "type": "chapter"
  }
]
Using GPT as a backup to format JSON object...
Using GPT as a backup to format JSON object...
Using GPT as a backup to format JSON object...
Processing pages 160 to 176
[
  {
    "image_index": 15,
    "type": "chapter"
  }
]
Processing pages 80 to 99
[
  {
    "image_index": 3,
    "type": "chapter"
  }
]
Processing pages 100 to 119
[
  {
    "image_index": 1,
    "type": "chapter"
  },
  {
    "image_index": 19,
    "type": "chapter"
  }
]
Using GPT as a backup to format JSON object...
Processing pages 40 to 59
[
  {
    "image_index": 5,
    "type": "chapter"
  }
]
Using GPT as a backup to format JSON object...
Using GPT as a backup to format JSON object...
Processing pages 140 to 159
[
  {
    "image_index": 18,
    "type": "chapter"
  }
]
Using GPT as a backup to format JSON object...
Processing pages 120 to 1

In [34]:
print(f"{len(volume)}")
print("\n__________\n")
print("Saving important pages to disk for QA...")
save_important_pages(volume, profile_pages, chapter_pages, manga, volume_number)

177

__________

Saving important pages to disk for QA...


In [35]:


character_profiles = [volume[i] for i in profile_pages]    
jobs = split_volume_into_parts(volume, chapter_pages, 3)

# Summarize the images in the first job
response = analyze_images_with_gpt4_vision(character_profiles, jobs[0], client, BASIC_PROMPT, BASIC_INSTRUCTIONS)
recap = response.choices[0].message.content
tokens = response.usage.total_tokens
movie_script = extract_text_and_citations(response.choices[0].message.content, jobs[0])

print("\n\n\n_____________\n\n\n")
print(response.choices[0].message.content)

# iterate thrugh the rest of the jobs while adding context from previous ones
for job in jobs[1:]:
    response = analyze_images_with_gpt4_vision(character_profiles, job, client, recap + "\n-----\n" + BASIC_PROMPT_WITH_CONTEXT, BASIC_INSTRUCTIONS)
    recap = recap + "\n\n" + response.choices[0].message.content
    tokens += response.usage.total_tokens
    print("\n\n\n_____________\n\n\n")
    print(response.choices[0].message.content)
    movie_script = movie_script + extract_text_and_citations(response.choices[0].message.content, job)

print("\n\n\n_____________\n\n\n")
print("\n\n\n_____________\n\n\n")
print("\n\n\n_____________\n\n\n")

narration_script = extract_script(movie_script)
print(narration_script)
print("\n___________\n")

ELEVENLABS_PRICE_PER_CHARACTER = 0.0003
print("Tokens for extracting profiles and chapters:", important_page_tokens, " | ", "${:,.4f}".format(VISION_PRICE_PER_TOKEN * important_page_tokens))
print("Tokens for summarization:", tokens,  " | ", "${:,.4f}".format(VISION_PRICE_PER_TOKEN * tokens))
print("Total GPT tokens:", important_page_tokens + tokens,  " | ", "${:,.4f}".format(VISION_PRICE_PER_TOKEN * (tokens+important_page_tokens)))
print("Total elevenlabs characters:", len(narration_script), " | ", "${:,.4f}".format(ELEVENLABS_PRICE_PER_CHARACTER * (len(narration_script))))
print("GRAND TOTAL COST"," | ", "${:,.4f}".format(VISION_PRICE_PER_TOKEN * (tokens+important_page_tokens) + ELEVENLABS_PRICE_PER_CHARACTER * (len(narration_script))))

7->65
66->138
139->end (177)



_____________



Amidst the fervor of the Chunin Exams, a clash of ideologies and techniques unfolds as Rock Lee, a prodigy of sheer effort, faces off against Gaara, a natural talent nurtured by the sand. The young ninja Lee, unfazed by Gaara's intimidating presence, launches himself into battle with his characteristic vigor. His initial salvo is thwarted by Gaara's sand, a defense seemingly unconquerable. Lee's mentor, Might Guy, watches with a solemn intensity, aware of Lee's limitations and strengths[^3][^4][^5].

Lee's unorthodox training and reliance on physical prowess impress onlookers[^8][^9]; he battles without the aid of ninjutsu or genjutsu, confounding the usual expectations of a ninja's skill set[^10]. With his spirit unyielding, Lee reveals his first trump card under Might Guy's approval—a set of weighted ankle wraps. Casting them aside, his true speed is unleashed—a blur too swift for the naked eye[^14][^15][^16]. The arena shakes, and Lee

In [36]:
await make_movie(movie_script, manga, volume_number, narration_client)

Narrating movie script...
got bytes: 67291
got bytes: 88189
got bytes: 112013
got bytes: 202710
got bytes: 84427
got bytes: 104071
got bytes: 76486
got bytes: 130821
got bytes: 111177
got bytes: 136672
got bytes: 126223
got bytes: 519941
got bytes: 91115
got bytes: 178050
got bytes: 101564
got bytes: 68127
got bytes: 70635
got bytes: 312633
got bytes: 114102
got bytes: 56424
got bytes: 90697
got bytes: 68963
got bytes: 119536
got bytes: 56424
got bytes: 122462
got bytes: 85263
got bytes: 84009
got bytes: 128313
got bytes: 82337
got bytes: 165511
got bytes: 191425
got bytes: 95712
got bytes: 232385
got bytes: 132911
got bytes: 213995
got bytes: 204382
got bytes: 287973
got bytes: 334367
got bytes: 354847
got bytes: 301348
got bytes: 247849
got bytes: 131239
got bytes: 80666
got bytes: 318484
got bytes: 285048
Editing movie together...
Number of images in segment 0: 1
Audio duration: 32.5
Number of images in segment 1: 3
Audio duration: 4.21
Number of images in segment 2: 2
Audio duratio

Detecting text:   1%|          | 1/154 [4:25:59<07:27,  2.92s/it]

Audio duration: 17.82
Number of images in segment 44: 1
Audio duration: 5.04
Moviepy - Building video naruto/v10/recap.mp4.
MoviePy - Writing audio in naruto/v10/temp-audio.m4a


Detecting text:   1%|          | 1/154 [4:26:13<07:27,  2.92s/it]

MoviePy - Done.
Moviepy - Writing video naruto/v10/recap.mp4



Detecting text:   1%|          | 1/154 [4:26:34<07:27,  2.92s/it]

Moviepy - Done !
Moviepy - video ready naruto/v10/recap.mp4
Movie created successfully!


True