# Smart Cultural Storyteller - AI Content Generation

This notebook generates images and videos for the Betal storytelling project using free AI models.

## Setup and Dependencies

In [None]:
# Install required packages
!pip install requests pillow opencv-python moviepy huggingface-hub transformers torch diffusers
!pip install elevenlabs gTTS  # For text-to-speech
!pip install gradio  # For easy UI testing

In [None]:
import requests
import json
import os
from PIL import Image, ImageEnhance, ImageFilter
import cv2
import numpy as np
from moviepy.editor import *
import torch
from diffusers import StableDiffusionPipeline
from gtts import gTTS
import io
from pathlib import Path

## Configuration

In [None]:
# API Keys - Replace with your actual keys
HUGGINGFACE_TOKEN = "your_huggingface_token_here"
ELEVENLABS_API_KEY = "your_elevenlabs_key_here"  # Optional, can use gTTS instead

# Output directories
OUTPUT_DIR = Path("generated_content")
IMAGES_DIR = OUTPUT_DIR / "images"
AUDIO_DIR = OUTPUT_DIR / "audio"
VIDEOS_DIR = OUTPUT_DIR / "videos"

# Create directories
for dir_path in [IMAGES_DIR, AUDIO_DIR, VIDEOS_DIR]:
    dir_path.mkdir(parents=True, exist_ok=True)

## Story Data

In [None]:
# Sample story for generation
story_data = {
    "id": "wisdom-1",
    "title": "The Wise Old Man and the Three Sons",
    "content": """Once upon a time, in a small village, there lived an old man with three sons. 
    The old man was known throughout the village for his wisdom and kindness. 
    As he grew older, he wanted to test which of his sons would inherit his wisdom.
    He gave each son a single grain of rice and told them to make it multiply by the next full moon.
    The first son planted it and got a small harvest. 
    The second son sold it and bought more rice. 
    But the third son gave it to a hungry child, saying that kindness multiplies in ways grain cannot.""",
    "scenes": [
        "An old wise man in traditional Indian clothing in a village setting",
        "Three sons receiving grains of rice from their father",
        "First son planting rice in a field, traditional Indian farming",
        "Second son at a marketplace exchanging rice for money", 
        "Third son giving rice to a hungry child, showing compassion",
        "The wise old man smiling, understanding true wisdom"
    ]
}

## Image Generation using Stable Diffusion

In [None]:
# Initialize Stable Diffusion pipeline (free model)
def initialize_sd_pipeline():
    model_id = "runwayml/stable-diffusion-v1-5"
    pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
    
    # Use CPU if no GPU available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    pipe = pipe.to(device)
    
    if device == "cuda":
        pipe.enable_memory_efficient_attention()
    
    return pipe

# Alternative: Use Hugging Face Inference API (free tier)
def generate_image_hf_api(prompt, filename):
    API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
    headers = {"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"}
    
    # Enhanced prompt for Indian cultural context
    enhanced_prompt = f"{prompt}, Indian art style, traditional, cultural, detailed, beautiful colors, cinematic lighting"
    
    payload = {
        "inputs": enhanced_prompt,
        "parameters": {
            "guidance_scale": 7.5,
            "num_inference_steps": 25
        }
    }
    
    response = requests.post(API_URL, headers=headers, json=payload)
    
    if response.status_code == 200:
        image_path = IMAGES_DIR / f"{filename}.png"
        with open(image_path, "wb") as f:
            f.write(response.content)
        print(f"Generated image: {image_path}")
        return image_path
    else:
        print(f"Error generating image: {response.status_code}")
        return None

In [None]:
# Generate images for the story
def generate_story_images(story_data):
    image_paths = []
    
    for i, scene_prompt in enumerate(story_data["scenes"]):
        filename = f"{story_data['id']}_scene_{i+1}"
        image_path = generate_image_hf_api(scene_prompt, filename)
        
        if image_path:
            image_paths.append(str(image_path))
        
        # Add delay to avoid rate limiting
        import time
        time.sleep(2)
    
    return image_paths

# Generate images
print("Generating story images...")
image_paths = generate_story_images(story_data)
print(f"Generated {len(image_paths)} images")

## Audio Generation (Text-to-Speech)

In [None]:
# Option 1: Use gTTS (Free, supports Indian English accent)
def generate_audio_gtts(text, filename, lang='en', tld='co.in'):
    """Generate audio using Google Text-to-Speech with Indian accent (male-sounding)"""
    tts = gTTS(text=text, lang=lang, tld=tld, slow=True)  # Slower speech for deeper tone
    audio_path = AUDIO_DIR / f"{filename}.mp3"
    tts.save(str(audio_path))
    print(f"Generated audio: {audio_path}")
    return audio_path

# Option 2: ElevenLabs API (Better quality, limited free tier)
def generate_audio_elevenlabs(text, filename):
    """Generate audio using ElevenLabs API with Daniel's deep male voice"""
    url = "https://api.elevenlabs.io/v1/text-to-speech/onwK4e9ZLuTAKqWW03F9"  # Daniel - mature male voice
    
    headers = {
        "Accept": "audio/mpeg",
        "Content-Type": "application/json",
        "xi-api-key": ELEVENLABS_API_KEY
    }
    
    data = {
        "text": text,
        "model_id": "eleven_multilingual_v2",
        "voice_settings": {
            "stability": 0.5,
            "similarity_boost": 0.5
        }
    }
    
    response = requests.post(url, json=data, headers=headers)
    
    if response.status_code == 200:
        audio_path = AUDIO_DIR / f"{filename}.mp3"
        with open(audio_path, 'wb') as f:
            f.write(response.content)
        print(f"Generated audio: {audio_path}")
        return audio_path
    else:
        print(f"Error generating audio: {response.status_code}")
        return None

In [None]:
# Generate narration audio
narration_text = story_data["content"].replace('\n', ' ').strip()
audio_filename = f"{story_data['id']}_narration"

print("Generating narration audio...")
# Use gTTS for free option
audio_path = generate_audio_gtts(narration_text, audio_filename)

# Uncomment below to use ElevenLabs instead
# audio_path = generate_audio_elevenlabs(narration_text, audio_filename)

## Video Generation with Effects

In [None]:
def add_parallax_effect(image_path, duration=5):
    """Add parallax/ken burns effect to image"""
    image = Image.open(image_path)
    
    # Resize image to be larger for parallax effect
    original_size = image.size
    new_width = int(original_size[0] * 1.2)
    new_height = int(original_size[1] * 1.2)
    image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
    
    # Create image clip with zoom and pan effect
    clip = ImageClip(np.array(image), duration=duration)
    
    # Add zoom effect
    clip = clip.resize(lambda t: 1 + 0.1 * t / duration)
    
    # Add subtle pan effect
    clip = clip.set_position(lambda t: ('center', 'center'))
    
    return clip

def add_mystical_effects(clip):
    """Add mystical visual effects"""
    # Add fade in/out
    clip = clip.fadeout(0.5).fadein(0.5)
    
    # Add subtle color enhancement
    clip = clip.fx(afx.colorx, factor=1.1)
    
    return clip

def create_story_video(image_paths, audio_path, story_id):
    """Create video from images and audio with effects"""
    
    # Load audio to get duration
    audio_clip = AudioFileClip(str(audio_path))
    total_duration = audio_clip.duration
    
    # Calculate duration per image
    duration_per_image = total_duration / len(image_paths)
    
    # Create video clips from images
    video_clips = []
    
    for i, image_path in enumerate(image_paths):
        if os.path.exists(image_path):
            # Add parallax effect
            clip = add_parallax_effect(image_path, duration_per_image)
            
            # Add mystical effects
            clip = add_mystical_effects(clip)
            
            # Set timing
            clip = clip.set_start(i * duration_per_image)
            
            video_clips.append(clip)
        else:
            print(f"Image not found: {image_path}")
    
    if not video_clips:
        print("No valid images found for video creation")
        return None
    
    # Concatenate video clips
    final_video = concatenate_videoclips(video_clips, method="compose")
    
    # Add audio
    final_video = final_video.set_audio(audio_clip)
    
    # Set video properties
    final_video = final_video.resize(height=720)  # HD resolution
    final_video = final_video.set_fps(24)
    
    # Export video
    video_path = VIDEOS_DIR / f"{story_id}_video.mp4"
    
    final_video.write_videofile(
        str(video_path),
        codec='libx264',
        audio_codec='aac',
        temp_audiofile='temp-audio.m4a',
        remove_temp=True
    )
    
    print(f"Generated video: {video_path}")
    return video_path

In [None]:
# Create the story video
print("Creating story video with effects...")
video_path = create_story_video(image_paths, audio_path, story_data['id'])

if video_path:
    print(f"\nStory video created successfully: {video_path}")
    print(f"You can now use this video in your web application!")
else:
    print("Failed to create video")

## Generate Subtitles

In [None]:
def generate_subtitles(story_content, audio_duration):
    """Generate subtitle timing for the story"""
    sentences = story_content.replace('\n', ' ').split('. ')
    sentences = [s.strip() + '.' for s in sentences if s.strip()]
    
    duration_per_sentence = audio_duration / len(sentences)
    
    subtitles = []
    current_time = 0
    
    for sentence in sentences:
        subtitle = {
            "start": round(current_time, 1),
            "end": round(current_time + duration_per_sentence, 1),
            "text": sentence
        }
        subtitles.append(subtitle)
        current_time += duration_per_sentence
    
    return subtitles

# Generate subtitles
from moviepy.editor import AudioFileClip
audio_clip = AudioFileClip(str(audio_path))
subtitles = generate_subtitles(story_data['content'], audio_clip.duration)

print("Generated subtitles:")
for subtitle in subtitles:
    print(f"{subtitle['start']}s - {subtitle['end']}s: {subtitle['text']}")

## Export Data for Web Application

In [None]:
# Create JSON data for web application
web_story_data = {
    "id": story_data['id'],
    "title": story_data['title'],
    "content": story_data['content'],
    "images": [f"/generated_content/images/{Path(p).name}" for p in image_paths],
    "audioUrl": f"/generated_content/audio/{audio_path.name}",
    "videoUrl": f"/generated_content/videos/{video_path.name}" if video_path else None,
    "subtitles": subtitles
}

# Save to JSON file
output_json = OUTPUT_DIR / f"{story_data['id']}_data.json"
with open(output_json, 'w') as f:
    json.dump(web_story_data, f, indent=2)

print(f"\nWeb application data saved to: {output_json}")
print("\nGenerated files:")
print(f"- Images: {len(image_paths)} files in {IMAGES_DIR}")
print(f"- Audio: {audio_path}")
if video_path:
    print(f"- Video: {video_path}")
print(f"- Data: {output_json}")

## Batch Process All Stories

In [None]:
# Function to process all 10 stories
def process_all_stories():
    """Process all stories in the dataset"""
    
    # You can define all 10 stories here or load from external file
    stories = [
        # Add all your story data here
        story_data  # We'll use the sample story for demo
    ]
    
    processed_stories = []
    
    for story in stories:
        print(f"\n=== Processing Story: {story['title']} ===")
        
        try:
            # Generate images
            image_paths = generate_story_images(story)
            
            # Generate audio
            narration_text = story["content"].replace('\n', ' ').strip()
            audio_path = generate_audio_gtts(narration_text, f"{story['id']}_narration")
            
            # Generate video
            video_path = create_story_video(image_paths, audio_path, story['id'])
            
            # Generate subtitles
            audio_clip = AudioFileClip(str(audio_path))
            subtitles = generate_subtitles(story['content'], audio_clip.duration)
            
            # Create web data
            web_data = {
                "id": story['id'],
                "title": story['title'],
                "content": story['content'],
                "images": [f"/generated_content/images/{Path(p).name}" for p in image_paths],
                "audioUrl": f"/generated_content/audio/{audio_path.name}",
                "videoUrl": f"/generated_content/videos/{video_path.name}" if video_path else None,
                "subtitles": subtitles
            }
            
            processed_stories.append(web_data)
            print(f"✅ Successfully processed: {story['title']}")
            
        except Exception as e:
            print(f"❌ Error processing {story['title']}: {str(e)}")
    
    # Save all processed stories
    all_stories_file = OUTPUT_DIR / "all_stories_data.json"
    with open(all_stories_file, 'w') as f:
        json.dump(processed_stories, f, indent=2)
    
    print(f"\n🎉 Batch processing complete! Data saved to: {all_stories_file}")
    return processed_stories

# Uncomment to process all stories
# processed_stories = process_all_stories()

## Instructions for Integration

1. **Copy generated files** to your web project's `public/generated_content/` folder
2. **Update story data** in your React app with the generated JSON
3. **Test the video playback** in your web application
4. **Adjust timing and effects** as needed for better user experience

### File Structure for Web App:
```
public/
├── generated_content/
│   ├── images/
│   │   ├── wisdom-1_scene_1.png
│   │   ├── wisdom-1_scene_2.png
│   │   └── ...
│   ├── audio/
│   │   └── wisdom-1_narration.mp3
│   └── videos/
│       └── wisdom-1_video.mp4
```