# Imports

In [1]:
!pip install -q -U pandas google-generativeai

In [2]:
!pip install elevenlabs



In [3]:
!pip install diffusers



In [4]:
!pip install transformers accelerate



In [5]:
!pip install moviepy



In [6]:
import pandas as pd
from google.colab import files
import google.generativeai as genai
import json
from google.colab import userdata
from elevenlabs.client import ElevenLabs
import torch
from diffusers import AutoPipelineForText2Image
import sys
from moviepy.editor import ImageClip, AudioFileClip, concatenate_videoclips, concatenate_audioclips
import moviepy.video.fx.all as vfx
import os
import time
from google.api_core import exceptions

# Load datasets

In [7]:
def load_and_process_data():
    """Uploads ONE OR MORE CSV files and processes them."""
    print("Please upload all your .csv files:")
    uploaded = files.upload() # This can now accept multiple files

    if not uploaded:
        print("No files uploaded. Exiting.")
        return None

    all_dataframes = [] # A list to hold each dataframe

    # Loop through all uploaded files
    for filename, content in uploaded.items():
        print(f"Reading file: {filename}...")
        try:
            from io import BytesIO
            df_temp = pd.read_csv(BytesIO(content))

            if 'decade' not in df_temp.columns or 'summary' not in df_temp.columns:
                print(f"  WARNING: File '{filename}' is missing 'decade' or 'summary'. Skipping this file.")
                continue

            all_dataframes.append(df_temp)

        except Exception as e:
            print(f"  Error reading {filename}: {e}")

    if not all_dataframes:
        print("No valid dataframes were loaded. Exiting.")
        return None

    # Merge all individual dataframes into one master dataframe
    df_master = pd.concat(all_dataframes, ignore_index=True)

    decade_data = df_master.groupby('decade')['summary'].apply(' '.join)

    print("\n--- All Decades Found and Processed ---")
    print(decade_data)
    return decade_data

# Load All Models

In [8]:
def get_elevenlabs_voices(client):
    """
    Fetches all premade voices from ElevenLabs and formats them
    as a string for the AI Director.
    """
    print("Fetching available ElevenLabs voices...")
    try:
        voices_string_list = []
        all_voices = client.voices.get_all().voices

        for voice in all_voices:
            if voice.category == 'premade':
                # Format the labels (e.g., {'age': 'middle_aged', 'accent': 'american'})
                # This gives the AI context on what the voice sounds like
                labels = ", ".join(f"{k}: {v}" for k, v in voice.labels.items())
                voices_string_list.append(
                    f'Name: {voice.name} (ID: {voice.voice_id}) - Description: {labels}'
                )

        print(f"Found {len(voices_string_list)} premade voices.")
        return "\n".join(voices_string_list)

    except Exception as e:
        print(f"Error fetching ElevenLabs voices: {e}")
        return None # Handle this error

In [9]:
def initialize_models():
    """Loads API keys and AI models (SDXL-only)."""
    print("Loading API keys...")
    try:
        # genai.configure(api_key=userdata.get('GEMINI_API_KEY'))
        genai.configure(api_key=userdata.get('GEMINI_API_KEY2'))
        elevenlabs_client = ElevenLabs(api_key=userdata.get('ELEVENLAB_KEY'))
        # if not using colab secrets
        # GEMINI_API_KEY = " " #<- input your key here
        # ELEVENLAB_KEY = " " #<- input your key here
    except Exception as e:
        print(f"Error loading API keys from Secrets: {e}")
        sys.exit()

    print("Loading AI Director model (Gemini 2.5 Pro)...")
    director_model = genai.GenerativeModel('gemini-2.5-pro')

    print("Loading AI Artist model (SDXL)... This may take a few minutes.")
    torch.cuda.empty_cache()
    try:
        artist_pipe = AutoPipelineForText2Image.from_pretrained(
            "stabilityai/stable-diffusion-xl-base-1.0",
            dtype=torch.float16,
            variant="fp16",
            use_safetensors=True
        ).to("cuda")
        # An A100 can handle this without offload, making it faster
        # artist_pipe.enable_model_cpu_offload()

    except Exception as e:
        print(f"Error loading SDXL model: {e}")
        sys.exit()

    print("Fetching available ElevenLabs voices...")
    voice_list_string = get_elevenlabs_voices(elevenlabs_client) # Assumes your 'fallback' version

    print("--- All models initialized successfully ---")

    return director_model, elevenlabs_client, artist_pipe, voice_list_string

# The AI "Director"

In [10]:
def run_director_step(decade, facts_string, model, voice_list_string):
    """
    Step 2: Calls Gemini with a "brute force" auto-retry logic
    that CHECKS THE ERROR MESSAGE for "429" or "quota".
    """
    print(f"Directing script for {decade}s...")

    # (Prompt setup code)
    text_length = len(facts_string)
    if text_length < 500: scene_count = 5
    elif text_length < 1500: scene_count = 7
    else: scene_count = 10
    print(f"Input facts length is {text_length}. Requesting {scene_count} scenes.")
    scene_examples = []
    for i in range(1, scene_count + 1):
        scene_examples.append(
            f'{{ "scene": {i}, "narration_cue": "The narration for this scene only.", "artist_prompt": "..." }}'
        )
    schema_example_scenes = ",\n        ".join(scene_examples)
    director_system_prompt = f"""
    You are an AI "Director" for a historical documentary.
    Your job is to take raw historical facts from the {decade}s, write a script, AND cast the narrator.
    ---
    AVAILABLE NARRATOR VOICES:
    {voice_list_string}
    ---
    TASK:
    1. Analyze the facts and the {decade}s.
    2. Write a script with **exactly {scene_count} scenes**.
    3. From the "AVAILABLE NARRATOR VOICES" list, select the *single best voice*.
    4. **CRITICAL:** The 'narration_script' must be the *full* script. The 'narration_cue' for *each scene* must be the *exact* portion of the script for that scene. Do not overlap cues.
    5. **NEW RULE:** If a scene prompt is about a **map**, add "highly detailed antique map, flat illustration style, cartography" to the. For all other scenes, use "dramatic, cinematic digital painting".
    6. Output a single JSON object. Do not include '```json' or any other text.
    The JSON must have this exact schema:
    {{
      "title": "A short, catchy video title about the {decade}s",
      "voice_id": "The chosen 'ID' (e.g., 'pNInz6obpgDQGcFmaJgB') from the voice list",
      "narration_script": "The complete narration script, as one string.",
      "scenes": [
        {schema_example_scenes}
      ]
    }}
    """

    # --- NEW: Improved Retry Logic ---
    total_attempts = 3 # We will try 3 times total

    for i in range(total_attempts):
        try:
            # Try to run the generation
            response = model.generate_content(
                [director_system_prompt, facts_string],
                generation_config=genai.types.GenerationConfig(
                    response_mime_type="application/json"
                )
            )
            # If it succeeds, load the JSON
            video_plan = json.loads(response.text)

            # If *that* succeeds, we are done
            print(f"Title: {video_plan['title']} ({len(video_plan['scenes'])} scenes)")
            print(f"AI-Selected Voice ID: {video_plan['voice_id']}")
            return video_plan # Success!

        except Exception as e:
            # --- THIS IS THE NEW "BRUTE FORCE" CATCH-ALL ---
            error_message = str(e)

            # Check if the error text is a rate-limit error
            is_rate_limit_error = "429" in error_message or "quota" in error_message.lower()

            if is_rate_limit_error:
                # It's a rate limit, let's try to wait
                print(f"  Server rate limit detected for {decade}.")

                if i < total_attempts - 1: # Check if we have retries left (e.g., i is 0 or 1)
                    wait_time = 65 # 65 seconds
                    print(f"  Waiting {wait_time}s to retry ({i+1}/{total_attempts} attempts)...")
                    time.sleep(wait_time)
                    print("  Retrying...")
                else:
                    # This was the last attempt
                    print("  No retries left. Failing this decade.")
                    raise e # Re-raise the error to be caught by main()

            else:
                # This is a *different* error (like bad JSON from Gemini)
                # We should NOT retry these.
                print(f"  Failed with non-retryable error: {e}")
                raise e # Fail immediately

    # If the loop finishes without returning, it means all retries failed
    raise Exception(f"Max {total_attempts} retries exceeded for Gemini API")

# The Narration Module

In [11]:
def run_narrator_step(video_plan, client, output_dir):
    """
    Step 3: Generates a SEPARATE audio file for EACH scene cue.
    Returns a list of audio file paths.
    """
    print("Generating audio for each scene...")

    audio_files = [] # This will be a list
    voice_to_use = video_plan.get('voice_id', "pNInz6obpgDQGcFmaJgB") # Get voice, with fallback

    print(f"Using AI-selected voice ID: {voice_to_use}")

    for scene in video_plan['scenes']:
        scene_number = scene['scene']
        narration_text = scene['narration_cue']

        # We need to make sure the AI didn't give us an empty cue
        if not narration_text or len(narration_text.strip()) < 2:
            print(f"  Warning: Scene {scene_number} has no narration cue. Skipping audio.")
            continue

        audio_filename = os.path.join(output_dir, f"narration_scene_{scene_number}.mp3")

        try:
            audio = client.text_to_speech.convert(
                text=narration_text,
                voice_id=voice_to_use,
                model_id="eleven_multilingual_v2"
            )

            with open(audio_filename, "wb") as f:
                for chunk in audio:
                    if chunk:
                        f.write(chunk)

            audio_files.append(audio_filename)
            print(f"  Audio saved as {audio_filename}")

        except Exception as e:
            print(f"  Error generating audio for scene {scene_number}: {e}")

    return audio_files # Returns the list of paths

# The AI "Artist"

In [12]:
def run_artist_step(video_plan, pipe, output_dir):
    """Step 4: Calls SDXL to generate high-quality STATIC images."""
    print("Generating static images...")
    image_files = [] # This will be a list of .png files

    for scene in video_plan['scenes']:
        prompt = scene['artist_prompt']
        scene_number = scene['scene']

        # We save as .png, not _still.png
        scene_filename = os.path.join(output_dir, f"scene_{scene_number}.png")
        neg_prompt = "cartoon, cgi, blurry, low-resolution, disfigured, (watermark:1.3), text, deformed, ugly"

        print(f"Generating image for scene {scene_number}...")
        image = pipe(
            prompt,
            negative_prompt=neg_prompt,
            num_inference_steps=25
        ).images[0]

        image.save(scene_filename)
        image_files.append(scene_filename)
        print(f"Saved {scene_filename}")

    return image_files # Returns a list of .png paths

# The AI "Editor"

In [13]:
def run_editor_step(video_plan, decade, image_files, narration_files, main_output_dir):
    """
    Step 5: Assembles video with PERFECT sync (Static images, no zoom).
    Matches each image's duration to its corresponding audio clip.
    """
    print("Assembling video with perfect sync (static images)...")

    # Check for mismatches
    if not image_files or not narration_files or len(image_files) != len(narration_files):
        print(f"  Error: Mismatch in scene count.")
        print(f"  Found {len(image_files)} images and {len(narration_files)} audio clips.")
        print("  Skipping video assembly.")
        return None

    scene_clips = []    # To hold video clips
    audio_clips = []    # To hold audio clips

    # Loop and match pairs
    for img_file, aud_file in zip(image_files, narration_files):
        try:
            # 1. Load the audio clip and get its exact duration
            audio_clip = AudioFileClip(aud_file)
            scene_duration = audio_clip.duration
            audio_clips.append(audio_clip) # Add to our list

            # 2. Load the image clip
            img_clip = ImageClip(img_file)

            # --- ZOOM REMOVED ---
            # img_clip = img_clip.resize(width=img_clip.w * 1.1)
            # img_clip = img_clip.resize(lambda t: 1 - (t * 0.05))

            # We keep this to ensure it's centered
            img_clip = img_clip.set_position(("center", "center"))

            # 3. Set the image duration to MATCH the audio duration
            img_clip = img_clip.set_duration(scene_duration)

            # Add fades
            fade_time = min(0.5, scene_duration / 4)
            img_clip = img_clip.fadein(fade_time).fadeout(fade_time)

            scene_clips.append(img_clip)

        except Exception as e:
            print(f"  Error processing clip {img_file}: {e}")

    if not scene_clips:
        print("No valid scenes were created. Skipping video.")
        return None

    # 4. Stitch everything together
    final_video_clip = concatenate_videoclips(scene_clips, method="compose")
    final_audio_clip = concatenate_audioclips(audio_clips)

    final_clip = final_video_clip.set_audio(final_audio_clip)

    safe_title = video_plan['title'].replace(' ', '_').replace(':', '_')
    output_filename = os.path.join(main_output_dir, f"{decade}_{safe_title}.mp4")

    final_clip.write_videofile(output_filename, fps=24, codec='libx264', logger=None)
    return output_filename

# Main Run

In [14]:
MAIN_OUTPUT_FOLDER = "TroyHistory"
os.makedirs(MAIN_OUTPUT_FOLDER, exist_ok=True)
print(f"All output will be saved in the '{MAIN_OUTPUT_FOLDER}' folder.")

All output will be saved in the 'TroyHistory' folder.


In [15]:
all_summaries = []

In [16]:
# --- Setup (Run once) ---
try:
    decade_data = load_and_process_data()
    if decade_data is None:
        print("Failed to load data. Exiting.")

    director_model, elevenlabs_client, artist_pipe, voice_list_string = initialize_models()
except Exception as e:
    print(f"Error during setup: {e}")

Please upload all your .csv files:


Saving troy_history_by_decade_podcast.csv to troy_history_by_decade_podcast (2).csv
Saving troy_history_by_decade_from_web.csv to troy_history_by_decade_from_web (2).csv
Saving troy_history_by_decade_book.csv to troy_history_by_decade_book (2).csv
Reading file: troy_history_by_decade_podcast (2).csv...
Reading file: troy_history_by_decade_from_web (2).csv...
Reading file: troy_history_by_decade_book (2).csv...

--- All Decades Found and Processed ---
decade
1500s    The early 1500s witnessed the initial European...
1520s    The 1520s witnessed the start of French-sponso...
1560s    The 1560s were a pivotal decade for European c...
1600s    The 1600s in the Troy area began with a pivota...
1610s    In 1614, Dutch traders, having reaped signific...
1620s    The 1620s saw the official beginning of Dutch ...
1630s    The 1630s marked the active establishment of N...
1640s    The 1640s in the area around modern-day Troy, ...
1650s    The 1650s saw the Dutch intensify their coloni...
1660s  

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.

Keyword arguments {'dtype': torch.float16} are not expected by StableDiffusionXLPipeline and will be ignored.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Fetching available ElevenLabs voices...
Fetching available ElevenLabs voices...
Found 20 premade voices.
--- All models initialized successfully ---


In [17]:
summary_filename = os.path.join(MAIN_OUTPUT_FOLDER, "summary_of_all_videos.json")
completed_decades = set()
print(f"Checking for existing summary file: {summary_filename}...")

try:
    if os.path.exists(summary_filename):
        with open(summary_filename, 'r') as f:
            all_summaries = json.load(f)
            # Get completed decades from the summary
            for item in all_summaries:
                completed_decades.add(str(item['decade']))
        print(f"Loaded {len(all_summaries)} summaries from existing JSON.")
        print(f"Found {len(completed_decades)} completed decades. Will skip them.")
    else:
        print("No summary file found. Will generate all videos.")

except Exception as e:
    print(f"Warning: Could not parse summary.json. Will re-generate all videos. {e}")
    all_summaries = []
    completed_decades = set()

Checking for existing summary file: TroyHistory/summary_of_all_videos.json...
Loaded 44 summaries from existing JSON.
Found 44 completed decades. Will skip them.


In [18]:
# --- Main Loop (Run per decade) ---
for decade, facts_string in decade_data.items():

    if str(decade) in completed_decades:
        print(f"--- ⏩ SKIPPING {decade} (already in summary.json) ---")
        continue

    print(f"\n--- 🎬 STARTING VIDEO FOR {decade} ---")

    decade_output_dir = os.path.join(MAIN_OUTPUT_FOLDER, str(decade))
    os.makedirs(decade_output_dir, exist_ok=True)
    print(f"Saving assets to: {decade_output_dir}")

    narration_files = []
    image_files = []

    try:
        # Step 2: Director
        video_plan = run_director_step(decade, facts_string, director_model, voice_list_string)

        # Step 3: Narrator
        narration_files = run_narrator_step(video_plan, elevenlabs_client, decade_output_dir)

        # Step 4: Artist
        image_files = run_artist_step(video_plan, artist_pipe, decade_output_dir)

        # Step 5: Editor
        output_filename = run_editor_step(video_plan, decade, image_files, narration_files, MAIN_OUTPUT_FOLDER)

        if output_filename:
            print(f"\n--- ✅ SUCCESSFULLY CREATED: {output_filename} ---")
            all_summaries.append({
                    "decade": decade,
                    "summary": video_plan['narration_script'],
                    "video_filename": output_filename,
                    "voice_id_used": video_plan.get('voice_id')
                })
            print("Cleaning up intermediate audio files...")
            for f in narration_files:
                try:
                    os.remove(f)
                except:
                    pass # Ignore errors
        else:
            print(f"\n--- ⚠️ SKIPPED video for {decade} (no images generated) ---")

    except Exception as e:
        # This robust error handling means one bad decade won't stop the whole script
        print(f"\n--- ❌ FAILED to create video for {decade}: {e} ---")
        print("Continuing to next decade...")

print("\n--- All videos generated! ---")

--- ⏩ SKIPPING 1500s (already in summary.json) ---
--- ⏩ SKIPPING 1520s (already in summary.json) ---
--- ⏩ SKIPPING 1560s (already in summary.json) ---
--- ⏩ SKIPPING 1600s (already in summary.json) ---
--- ⏩ SKIPPING 1610s (already in summary.json) ---
--- ⏩ SKIPPING 1620s (already in summary.json) ---
--- ⏩ SKIPPING 1630s (already in summary.json) ---
--- ⏩ SKIPPING 1640s (already in summary.json) ---
--- ⏩ SKIPPING 1650s (already in summary.json) ---
--- ⏩ SKIPPING 1660s (already in summary.json) ---
--- ⏩ SKIPPING 1670s (already in summary.json) ---
--- ⏩ SKIPPING 1680s (already in summary.json) ---
--- ⏩ SKIPPING 1690s (already in summary.json) ---
--- ⏩ SKIPPING 1700s (already in summary.json) ---
--- ⏩ SKIPPING 1710s (already in summary.json) ---
--- ⏩ SKIPPING 1720s (already in summary.json) ---
--- ⏩ SKIPPING 1730s (already in summary.json) ---
--- ⏩ SKIPPING 1740s (already in summary.json) ---
--- ⏩ SKIPPING 1750s (already in summary.json) ---
--- ⏩ SKIPPING 1760s (already i

  0%|          | 0/25 [00:00<?, ?it/s]

Saved TroyHistory/1930s/scene_1.png
Generating image for scene 2...


  0%|          | 0/25 [00:00<?, ?it/s]

Saved TroyHistory/1930s/scene_2.png
Generating image for scene 3...


  0%|          | 0/25 [00:00<?, ?it/s]

Saved TroyHistory/1930s/scene_3.png
Generating image for scene 4...


  0%|          | 0/25 [00:00<?, ?it/s]

Saved TroyHistory/1930s/scene_4.png
Generating image for scene 5...


  0%|          | 0/25 [00:00<?, ?it/s]

Saved TroyHistory/1930s/scene_5.png
Assembling video with perfect sync (static images)...

--- ✅ SUCCESSFULLY CREATED: TroyHistory/1930s_Troy's_Great_Shrink__The_Fabric_of_Innovation.mp4 ---
Cleaning up intermediate audio files...
--- ⏩ SKIPPING 1940s (already in summary.json) ---
--- ⏩ SKIPPING 1950s (already in summary.json) ---
--- ⏩ SKIPPING 1960s (already in summary.json) ---
--- ⏩ SKIPPING 1970s (already in summary.json) ---
--- ⏩ SKIPPING 1980s (already in summary.json) ---
--- ⏩ SKIPPING 1990s (already in summary.json) ---
--- ⏩ SKIPPING 2000s (already in summary.json) ---
--- ⏩ SKIPPING 2010s (already in summary.json) ---
--- ⏩ SKIPPING 2020s (already in summary.json) ---

--- All videos generated! ---


In [19]:
# --- Save the final JSON summary ---
if all_summaries:
    summary_filename = os.path.join(MAIN_OUTPUT_FOLDER, "summary_of_all_videos.json")
    print(f"Saving final summary to {summary_filename}...")
    try:
        with open(summary_filename, 'w') as f:
            json.dump(all_summaries, f, indent=4)
        print("Summary file saved.")
    except Exception as e:
        print(f"Error saving summary file: {e}")

Saving final summary to TroyHistory/summary_of_all_videos.json...
Summary file saved.
