In [None]:
# Processing and Narrating a Video with GPT-4o and TTS API

# This guide demonstrates using GPT-4o's visual capabilities with video frames and generating a voiceover using the TTS API.

# 1. Describing a Video with GPT-4o

# Extract Frames and Create a Storyboard

# Extract Frames from Video:
# Use OpenCV to read a nature video (bisons and wolves).
# Convert frames to base64 format.
# Display frames to verify.

import cv2
import base64
import time
from IPython.display import display, Image
from langchain_community.document_loaders import 
video = cv2.VideoCapture("data/bison.mp4")
base64Frames = []

while video.isOpened():
    success, frame = video.read()
    if not success:
        break
    _, buffer = cv2.imencode(".jpg", frame)
    base64Frames.append(base64.b64encode(buffer).decode("utf-8"))

video.release()
print(len(base64Frames), "frames read.")

display_handle = display(None, display_id=True)
for img in base64Frames:
    display_handle.update(Image(data=base64.b64decode(img.encode("utf-8"))))
    time.sleep(0.025)

# Generate a Storyboard Description

# Generate Storyboard with Descriptions:
# Craft a prompt with selected frames.
# Send request to GPT-4o for a compelling storyboard.

from openai import OpenAI
import os

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "<your OpenAI API key if not set as env var>"))

PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "These are frames from a video that I want to upload. Generate a storyboard with a compelling description for each frame.",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
        ],
    },
]
params = {
    "model": "gpt-4o",
    "messages": PROMPT_MESSAGES,
    "max_tokens": 200,
}

result = client.chat.completions.create(**params)
storyboard = result.choices[0].message.content

print(storyboard)

# Highlight Key Moments in the Video

# Highlight Key Moments:
# Identify and describe key moments in the video.

PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "These are frames from a video. Highlight and describe key moments that make the video interesting.",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::50]),
        ],
    },
]
params = {
    "model": "gpt-4o",
    "messages": PROMPT_MESSAGES,
    "max_tokens": 200,
}

result = client.chat.completions.create(**params)
key_moments = result.choices[0].message.content

print(key_moments)

# 2. Generating a Voiceover with GPT-4o and TTS API

# Create a Thematic Voiceover Script

# Generate Thematic Voiceover Script:
# Prompt GPT-4o to generate a voiceover script focusing on a specific theme (e.g., "The Struggle for Survival").

PROMPT_MESSAGES = [
    {
        "role": "user",
        "content": [
            "These are frames of a video. Create a short voiceover script focusing on the theme 'The Struggle for Survival' in the style of David Attenborough.",
            *map(lambda x: {"image": x, "resize": 768}, base64Frames[0::60]),
        ],
    },
]
params = {
    "model": "gpt-4o",
    "messages": PROMPT_MESSAGES,
    "max_tokens": 500,
}

result = client.chat.completions.create(**params)
voiceover_script = result.choices[0].message.content

print(voiceover_script)

# Generate Voiceover with Different Voices

# Generate Voiceover Audio with Various Voices:
# Pass the script to the TTS API and generate audio with different voices.

import requests


def generate_voiceover(script, voice):
    response = requests.post(
        "https://api.openai.com/v1/audio/speech",
        headers={
            "Authorization": f"Bearer {os.environ['OPENAI_API_KEY']}",
        },
        json={
            "model": "tts-1-1106",
            "input": script,
            "voice": voice,
        },
    )

    audio = b""
    for chunk in response.iter_content(chunk_size=1024 * 1024):
        audio += chunk

    return audio


voiceover_onyx = generate_voiceover(voiceover_script, "onyx")
voiceover_amber = generate_voiceover(voiceover_script, "amber")

from IPython.display import Audio

display(Audio(voiceover_onyx))
display(Audio(voiceover_amber))

# This guide now includes creating a storyboard, highlighting key moments, and generating voiceovers with various voices for a more dynamic and engaging presentation.
