# Story Generator

This notebook generates a narrated video from a short story by:
- Generating narration audio (gTTS)
- Generating scene clips (Hugging Face T2V API)
- Stitching with transitions and audio (moviepy)
- Exporting a small JSON payload for a web client


In [None]:
# Setup
import os, json, time, requests
from pathlib import Path
from typing import List
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
from gtts import gTTS

# Config
HUGGINGFACE_TOKEN = "your_huggingface_token_here"
OUTPUT_DIR = Path('generated_content')
AUDIO_DIR = OUTPUT_DIR / 'audio'
VIDEOS_DIR = OUTPUT_DIR / 'videos'
for d in [OUTPUT_DIR, AUDIO_DIR, VIDEOS_DIR, VIDEOS_DIR / 'clips']:
    d.mkdir(parents=True, exist_ok=True)

# Story data
story_data = {
    'id': 'wisdom-1',
    'title': 'The Wise Old Man and the Three Sons',
    'content': """Once upon a time, in a small village, there lived an old man with three sons.
The old man was known throughout the village for his wisdom and kindness.
As he grew older, he wanted to test which of his sons would inherit his wisdom.
He gave each son a single grain of rice and told them to make it multiply by the next full moon.
The first son planted it and got a small harvest.
The second son sold it and bought more rice.
But the third son gave it to a hungry child, saying that kindness multiplies in ways grain cannot.""",
    'scenes': [
        'An old wise man in traditional Indian clothing in a village setting',
        'Three sons receiving grains of rice from their father',
        'First son planting rice in a field, traditional Indian farming',
        'Second son at a marketplace exchanging rice for money',
        'Third son giving rice to a hungry child, showing compassion',
        'The wise old man smiling, understanding true wisdom'
    ]
}

# TTS (male Indian accent via gTTS + slight pitch drop)
def generate_audio_gtts(text: str, filename: str, lang: str = 'en', tld: str = 'co.in') -> Path:
    out_path = AUDIO_DIR / f'{filename}.mp3'
    tts = gTTS(text=text, lang=lang, tld=tld, slow=True)
    tts.save(str(out_path))
    try:
        from pydub import AudioSegment
        from pydub.effects import normalize
        audio = AudioSegment.from_mp3(str(out_path))
        audio = audio._spawn(audio.raw_data, overrides={'frame_rate': int(audio.frame_rate * 0.9)})
        audio = audio.set_frame_rate(44100)
        audio = normalize(audio)
        audio.export(str(out_path), format='mp3')
    except Exception as e:
        print('pydub post-processing skipped or failed:', e)
    return out_path

# Direct Text-to-Video API
HF_T2V_MODEL = 'damo-vilab/text-to-video-ms-1.7b'
HF_T2V_URL = f'https://api-inference.huggingface.co/models/{HF_T2V_MODEL}'

def generate_video_clip_hf(prompt: str, filename: str, num_frames: int = 48, fps: int = 24, width: int = 512, height: int = 288) -> Path:
    headers = {'Authorization': f'Bearer {HUGGINGFACE_TOKEN}', 'Accept': 'video/mp4'}
    payload = {'inputs': f"{prompt}, cinematic, Indian cultural aesthetics, vibrant colors, detailed", 'parameters': {'num_frames': num_frames, 'fps': fps, 'width': width, 'height': height}}
    clips_dir = VIDEOS_DIR / 'clips'
    clips_dir.mkdir(exist_ok=True)
    out_path = clips_dir / f'{filename}.mp4'
    resp = requests.post(HF_T2V_URL, headers=headers, json=payload, timeout=600)
    if resp.status_code == 200:
        with open(out_path, 'wb') as f:
            f.write(resp.content)
        return out_path
    else:
        raise RuntimeError(f'HF T2V error {resp.status_code}: {resp.text[:200]}')


def generate_scene_clips_from_prompts(story: dict, fps: int = 24, seconds_per_scene: float = 4.0) -> List[str]:
    clip_paths: List[str] = []
    num_frames = int(fps * seconds_per_scene)
    for i, scene_prompt in enumerate(story.get('scenes', []), start=1):
        fname = f"{story['id']}_scene_{i:02d}"
        p = generate_video_clip_hf(scene_prompt, fname, num_frames=num_frames, fps=fps)
        clip_paths.append(str(p))
        time.sleep(2)
    return clip_paths

# Stitching with audio
def stitch_clips_with_audio(clip_paths: List[str], audio_path: Path, story_id: str) -> Path:
    audio_clip = AudioFileClip(str(audio_path))
    total_duration = max(1.0, audio_clip.duration)
    if not clip_paths:
        raise ValueError('No clips to stitch')
    duration_per_clip = total_duration / len(clip_paths)
    transition = 0.4
    clips = []
    for i, p in enumerate(clip_paths):
        c = VideoFileClip(p)
        c = c.set_duration(duration_per_clip + (transition if i < len(clip_paths)-1 else 0))
        if i > 0:
            c = c.crossfadein(transition)
        clips.append(c)
    final = concatenate_videoclips(clips, method='compose')
    final = final.set_duration(total_duration).set_audio(audio_clip).set_fps(24)
    out_path = VIDEOS_DIR / f'{story_id}_video.mp4'
    final.write_videofile(str(out_path), codec='libx264', audio_codec='aac', fps=24, verbose=False, logger=None)
    return out_path

# Subtitles
def generate_subtitles(story_content: str, audio_duration: float):
    sentences = story_content.replace('\n', ' ').split('. ')
    sentences = [s.strip() + '.' for s in sentences if s.strip()]
    if not sentences:
        return []
    dur_per = audio_duration / len(sentences)
    subs, t = [], 0.0
    for s in sentences:
        subs.append({'start': round(t, 2), 'end': round(t + dur_per, 2), 'text': s})
        t += dur_per
    return subs

# Run pipeline
narration_text = story_data['content'].replace('\n', ' ').strip()
audio_filename = f"{story_data['id']}_narration"
audio_path = generate_audio_gtts(narration_text, audio_filename)
clip_paths = generate_scene_clips_from_prompts(story_data, fps=24, seconds_per_scene=4.0)
video_path = stitch_clips_with_audio(clip_paths, audio_path, story_data['id'])

# Export JSON
audio_clip = AudioFileClip(str(audio_path))
subtitles = generate_subtitles(story_data['content'], audio_clip.duration)
web_story_data = {
    'id': story_data['id'],
    'title': story_data['title'],
    'content': story_data['content'],
    'images': [],
    'audioUrl': f"/generated_content/audio/{Path(audio_path).name}",
    'videoUrl': f"/generated_content/videos/{Path(video_path).name}",
    'subtitles': subtitles
}
out_json = OUTPUT_DIR / f"{story_data['id']}_data.json"
with open(out_json, 'w') as f:
    json.dump(web_story_data, f, indent=2)
print('Done:', video_path, audio_path, out_json)
