# Phase 0 — Bedrock Claude 3.5 Sonnet Caption Generator

This notebook generates a concise caption from an image using AWS Bedrock (Anthropic Claude 3.5 Sonnet).
- Set up AWS credential using aws configure in bash



In [5]:
# %pip install -q boto3 botocore

import os
import json
import base64
import mimetypes
from pathlib import Path

import boto3



In [None]:
MODEL_ID = "us.anthropic.claude-3-5-sonnet-20241022-v2:0"
PROMPT_FILE = "phase0/prompts/caption_generation_prompt.txt"  
IMAGE_PATH = "video_cover1.jpg" 
VIDEO_DESCRIPTION = "我和我的柴犬宝宝训练的搞笑视频，我先演示一遍动作，柴犬就跟着做一遍动作，比如坐，趴下，打滚。"
MAX_TOKENS = 1024
TEMPERATURE = 1
PRINT_PAYLOAD = True
DRY_RUN = False


In [4]:
prompt_path = "../prompts/caption_generation_prompt.txt"

with open(prompt_path, "r", encoding="utf-8") as f:
    prompt_template = f.read()

filled_prompt = prompt_template.replace("{video_description}", VIDEO_DESCRIPTION)
print("Text Message in Prompt Loaded:\n")
print(filled_prompt)


Text Message in Prompt Loaded:

<Role>
You are an expert social media manager designing creative captions.
</Role>

<Input>
Video Cover: An image for the video cover. This is what the audience will see in their home page. 
Video Description: Below is a description of what the video is about:
我和我的柴犬宝宝训练的搞笑视频，我先演示一遍动作，柴犬就跟着做一遍动作，比如坐，趴下，打滚。
</Input>

<Task>
- Respond in the same language as main language used in Video Description, if not provided, use English. 
- Step 1: Carefully analyze the input Video Cover and Video Description to understand the key message or highlight of the video, then summarize the selling point we should leverage to attract audience.
- Step 2: Identify 3 different video title stype for the video, ex: humor, emotional, cute.
- Step 3: For each style from Step 2, generate ONE concise, catchy video title under a soft limit of 20 characters. Make engaging, natural, and suitable for social media, along with the Video Cover it should attract audience to click and engag

In [9]:
# Prepare image and build payload
# base working directory
cwd = Path.cwd() 
# IMAGE_PATH relative to repo
image_candidates = [
    cwd / IMAGE_PATH,                           # relative to current working dir
    cwd.parent / "samples" / IMAGE_PATH,       # up one level, then into samples/
    Path("phase0") / "samples" / IMAGE_PATH,   # relative from repo root
]
# IMAGE_PATH relative to repo
image_candidates = [
    cwd / IMAGE_PATH,                           # relative to current working dir
    cwd.parent / "samples" / IMAGE_PATH,       # up one level, then into samples/
    Path("phase0") / "samples" / IMAGE_PATH,   # relative from repo root
]

resolved_image = None
for img in image_candidates:
    if img.is_file():
        resolved_image = img
        break

assert resolved_image is not None, f"Image not found: {IMAGE_PATH} (checked: {image_candidates})"

# Read and encode image
with open(resolved_image, "rb") as f:
    image_b64 = base64.b64encode(f.read()).decode("utf-8")

mime, _ = mimetypes.guess_type(resolved_image)
image_mime = mime or "application/octet-stream"


payload = {
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": MAX_TOKENS,
    "temperature": TEMPERATURE,
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_mime,
                        "data": image_b64,
                    },
                },
                {"type": "text", "text": filled_prompt},
            ],
        }
    ],
}

if PRINT_PAYLOAD:
    print("Using image:", resolved_image)
    print(json.dumps(payload, indent=2)[:800])


Using image: /Users/dennis/Desktop/Project Blankey/project_blankey/phase0/samples/video_cover1.jpg
{
  "anthropic_version": "bedrock-2023-05-31",
  "max_tokens": 1024,
  "temperature": 1,
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "image",
          "source": {
            "type": "base64",
            "media_type": "image/jpeg",
            "data": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgFBgcGBQgHBgcJCAgJDBMMDAsLDBgREg4THBgdHRsYGxofIywlHyEqIRobJjQnKi4vMTIxHiU2OjYwOiwwMTD/2wBDAQgJCQwKDBcMDBcwIBsgMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDAwMDD/wAARCASQBJIDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2u


In [10]:
# Create Bedrock client and invoke
if not DRY_RUN:
    bedrock = boto3.client("bedrock-runtime", region_name='us-east-2')
    resp = bedrock.invoke_model(modelId=MODEL_ID, body=json.dumps(payload))
    raw = resp.get("body")
    text = raw.read().decode("utf-8") if hasattr(raw, "read") else str(raw)

    response_json = json.loads(text)
    print(response_json['content'][0]['text'])
   
else:
    print("[dry-run] Skipping Bedrock invocation.")


{
  "selling_point": "柴犬和主人一起做趴下训练的可爱互动瞬间",
  "captions": [
    {
      "style": "搞笑",
      "caption": "柴犬：这动作我也会！"
    },
    {
      "style": "温馨",
      "caption": "和柴宝宝的训练时光"
    },
    {
      "style": "趣味",
      "caption": "主人趴下柴柴跟着趴"
    }
  ]
}
