In [5]:
import os
import requests
import glob
import random
import json
import ffmpeg
import openai
import elevenlabs
from dotenv import load_dotenv
from mutagen.mp3 import MP3
from elevenlabs.client import ElevenLabs

# ========================
# 1. SETUP & CONFIGURATION
# ========================

# Load environment variables from .env file
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")

# Check if API keys are set
if not OPENAI_API_KEY or not ELEVENLABS_API_KEY:
    raise ValueError("❌ API keys for OpenAI and ElevenLabs must be set in the .env file.")

# Initialize API clients
openai_client = openai.OpenAI(api_key=OPENAI_API_KEY)
elevenlabs.api_key = ELEVENLABS_API_KEY
# Initialize ElevenLabs client
elevenlabs = ElevenLabs(
    api_key=os.getenv("ELEVENLABS_API_KEY")  # Or replace with your API key directly
)


In [None]:
def generate_story_with_prompts(user_prompt):
    """
    Generates a content with scenes and image prompts using OpenAI's GPT model.
    Each scene text is limited (~60 characters ≈ 10 sec audio).
    """
    print("✍️  Generating conent and image prompts...")
    system_prompt = """
    You are a general content generator. Based on the user's prompt, generate texts.
    
    Output must be a valid JSON with:
    - "title": A short story title (max 6 words).
    - "scenes": Exactly 5 items, each with:
        1. "text": A short story snippet, about 15-20 words ( suitable for ~10 sec TTS audio).
        2. "image_prompt": A descriptive, visually rich prompt for image generation (1–2 sentences).
    
    Strict output format:
    {
      "title": "The Last Stargazer",
      "scenes": [
        {
          "text": "In a twilight city, Elias adjusted his grandfather's brass telescope, hoping to glimpse the last star before night vanished forever.",
          "image_prompt": "A solitary figure on a futuristic rooftop at dusk, peering through a brass telescope toward a fading star."
        }
      ]
    }
    """

    try:
        response = openai_client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            response_format={"type": "json_object"}
        )
        story_data = json.loads(response.choices[0].message.content)
        print("✅ Story generated successfully.")
        print(story_data)
        return story_data
    except Exception as e:
        print(f"❌ Error generating story: {e}")
        raise


In [7]:
# --- Get User Input ---
user_prompt = input("👉 Enter a prompt for your requirement: ")

# --- Generate Content ---
story_data = generate_story_with_prompts(user_prompt)

✍️  Generating story and image prompts...
✅ Story generated successfully.
{'title': 'Whispers of Ancient Maharashtra', 'scenes': [{'text': "Ancient echoes lingered as the vibrant hues of Maharashtra's terrain unfolded from lush plains to formidable forts.", 'image_prompt': "A panoramic view of Maharashtra's diverse landscape, featuring verdant fields and grand, ancient forts under a clear, blue sky."}, {'text': 'Time-worn temples whispered tales of devotion, their intricate carvings testament to centuries-old artistry and craftsmanship.', 'image_prompt': 'A close-up of a historic temple in Maharashtra, showcasing its detailed stone carvings and weathered, yet majestic structure.'}, {'text': "The bustling markets once heard the clamor of traders, each stall a microcosm of Maharashtra's rich, cultural tapestry.", 'image_prompt': 'A vibrant market scene with colorful stalls, diverse goods on display, and people engaged in lively barter conversations.'}]}


In [1]:
story_data={'title': 'Whispers of Ancient Maharashtra', 'scenes': [{'text': "Ancient echoes lingered as the vibrant hues of Maharashtra's terrain unfolded from lush plains to formidable forts.", 'image_prompt': "A panoramic view of Maharashtra's diverse landscape, featuring verdant fields and grand, ancient forts under a clear, blue sky."}, {'text': 'Time-worn temples whispered tales of devotion, their intricate carvings testament to centuries-old artistry and craftsmanship.', 'image_prompt': 'A close-up of a historic temple in Maharashtra, showcasing its detailed stone carvings and weathered, yet majestic structure.'}, {'text': "The bustling markets once heard the clamor of traders, each stall a microcosm of Maharashtra's rich, cultural tapestry.", 'image_prompt': 'A vibrant market scene with colorful stalls, diverse goods on display, and people engaged in lively barter conversations.'}]}


In [9]:
len(story_data['scenes'])

3

In [6]:
# Define directories
IMAGE_DIR = "output_images"
VIDEO_DIR = "output_videos"
MUSIC_DIR = "music"

In [11]:
def clean_story(story_text):
    # Stub: implement your cleaning logic here if needed
    return story_text

In [12]:
def generate_narration(story_text, filename, voice_id="G17SuINrv2H9FC6nvetn"):
    story_text=clean_story(story_text)
    # voice_id="yFJbqk0f3hzpxkA3vSqT"
    try:
        # Stream audio
        audio_stream = elevenlabs.text_to_speech.stream(
            text=story_text,
            voice_id=voice_id,
            model_id="eleven_multilingual_v2"
        )

        # Collect chunks
        audio_bytes = b""
        for chunk in audio_stream:
            if isinstance(chunk, bytes):
                audio_bytes += chunk

        # Save to file
        os.makedirs("output_videos", exist_ok=True)
        audio_path = os.path.join("output_videos", filename)
        with open(audio_path, "wb") as f:
            f.write(audio_bytes)

        print("🎧 Narration saved:", audio_path)
        return audio_path

    except Exception as e:
        print("❌ ElevenLabs TTS Error:", str(e))
        return None


In [None]:
full_narration_text = story_data.get('title', '') + ". " + " ".join([scene['text'] for scene in story_data['scenes']])
narration_path=generate_narration(full_narration_text, "ca_narration.mp3")

🎧 Narration saved: output_videos\ca_narration.mp3


'output_videos\\ca_narration.mp3'

In [14]:
story_data

{'title': 'Whispers of Ancient Maharashtra',
 'scenes': [{'text': "Ancient echoes lingered as the vibrant hues of Maharashtra's terrain unfolded from lush plains to formidable forts.",
   'image_prompt': "A panoramic view of Maharashtra's diverse landscape, featuring verdant fields and grand, ancient forts under a clear, blue sky."},
  {'text': 'Time-worn temples whispered tales of devotion, their intricate carvings testament to centuries-old artistry and craftsmanship.',
   'image_prompt': 'A close-up of a historic temple in Maharashtra, showcasing its detailed stone carvings and weathered, yet majestic structure.'},
  {'text': "The bustling markets once heard the clamor of traders, each stall a microcosm of Maharashtra's rich, cultural tapestry.",
   'image_prompt': 'A vibrant market scene with colorful stalls, diverse goods on display, and people engaged in lively barter conversations.'}]}

In [None]:
from google import genai
from google.genai import types
from PIL import Image
from io import BytesIO
import os

# Gemini client
gemini_client = genai.Client()

def generate_image(prompt, index):
    """
    Generates an image using Gemini and saves it.
    """
    print(f"🎨 Generating image for scene {index+1}...")
    try:
        # Generate content (image + optional text)
        response = gemini_client.models.generate_content(
            model="gemini-2.0-flash-preview-image-generation",
            contents=prompt,
            config=types.GenerateContentConfig(
                response_modalities=['TEXT', 'IMAGE']
            )
        )

        # Make sure output directory exists
        os.makedirs(IMAGE_DIR, exist_ok=True)
        image_path = os.path.join(IMAGE_DIR, f"ca_scene_{index+1}.png")

        # Loop through candidates and save images
        for part in response.candidates[0].content.parts:
            if part.inline_data is not None:
                image = Image.open(BytesIO(part.inline_data.data))
                image.save(image_path)
                print(f"✅ Image saved at: {image_path}")
                return image_path
        
        print(f"⚠️ No image data returned for scene {index+1}")
        return None

    except Exception as e:
        print(f"❌ Error generating image for scene {index+1}: {e}")
        return None


In [7]:
image_paths=[]
for i, scene in enumerate(story_data['scenes']):
    img_path = generate_image(scene['image_prompt'], i)
    if img_path:
        image_paths.append(img_path)

🎨 Generating image for scene 1...
✅ Image saved at: output_images\scene_1.png
🎨 Generating image for scene 2...
✅ Image saved at: output_images\scene_2.png
🎨 Generating image for scene 3...
✅ Image saved at: output_images\scene_3.png


In [8]:
def images_to_video_ffmpeg(image_dir, narration_audio_path, output_dir):
    """
    Stitches images and audio into a video using FFmpeg.
    """
    print("🎬 Starting video creation process...")
    try:
        # Get image paths and calculate duration per image
        image_paths = sorted(glob.glob(os.path.join(image_dir, "*.png")))
        if not image_paths:
            raise ValueError("❌ No images found in the provided directory.")

        narration_audio = MP3(narration_audio_path)
        total_duration = narration_audio.info.length
        duration_per_image = total_duration / len(image_paths)

        # Select random background music
        music_files = glob.glob(os.path.join(MUSIC_DIR, "*.mp3"))
        if not music_files:
            raise ValueError("❌ No background music found in music/ directory.")
        bg_music_path = random.choice(music_files)

        # Define file paths
        os.makedirs(output_dir, exist_ok=True)
        list_file = "image_list.txt"
        slideshow_path = os.path.join(output_dir, "temp_video.mp4")
        mixed_audio_path = os.path.join(output_dir, "mixed_audio.aac")
        final_output_path = os.path.join(output_dir, "final_video.mp4")
        
        # --- Step 1: Create image list file for FFmpeg ---
        with open(list_file, 'w') as f:
            for path in image_paths:
                f.write(f"file '{os.path.abspath(path)}'\n")
                f.write(f"duration {duration_per_image:.2f}\n")
            f.write(f"file '{os.path.abspath(image_paths[-1])}'\n") # Ensure last image holds till end

        # --- Step 2: Create silent slideshow video ---
        (
            ffmpeg
            .input(list_file, format='concat', safe=0)
            .output(slideshow_path, vcodec='libx264', pix_fmt='yuv420p', r=24, vsync='vfr')
            .run(overwrite_output=True, quiet=True)
        )
        print("✅ Slideshow created.")

        # --- Step 3: Mix narration and background music ---
        main_audio = ffmpeg.input(narration_audio_path)
        background_audio = ffmpeg.input(bg_music_path).filter('volume', 0.15)

        (
            ffmpeg
            .filter([main_audio, background_audio], 'amix', inputs=2, duration='first', dropout_transition=0)
            .output(mixed_audio_path, acodec='aac')
            .run(overwrite_output=True, quiet=True)
        )
        print("✅ Audio mixed.")

        # --- Step 4: Combine slideshow with mixed audio ---
        video_input = ffmpeg.input(slideshow_path)
        audio_input = ffmpeg.input(mixed_audio_path)
        
        (
            ffmpeg
            .output(video_input, audio_input, final_output_path, vcodec='copy', acodec='copy', shortest=None)
            .run(overwrite_output=True, quiet=True)
        )
        
        # --- Step 5: Cleanup temporary files ---
        os.remove(list_file)
        os.remove(slideshow_path)
        os.remove(mixed_audio_path)

        print(f"🎉 Final video saved at: {final_output_path}")
        return final_output_path

    except ffmpeg.Error as e:
        print("❌ FFmpeg error occurred:")
        print("STDOUT:", e.stdout.decode() if e.stdout else "N/A")
        print("STDERR:", e.stderr.decode() if e.stderr else "N/A")
        raise
    except Exception as ex:
        print(f"❌ An error occurred during video creation: {ex}")
        raise

In [10]:
narration_path='output_videos\\ca_narration.mp3'

In [11]:
images_to_video_ffmpeg(IMAGE_DIR, narration_path, VIDEO_DIR)


🎬 Starting video creation process...


NameError: name 'ffmpeg' is not defined