# Phase 0 — Bedrock Claude 3.5 Sonnet Caption Generator




In [1]:
# %pip install -q boto3 botocore

import os
import json
import base64
import mimetypes
from dataclasses import dataclass
from typing import Optional
from pathlib import Path

import boto3
from botocore.exceptions import BotoCoreError, ClientError


In [2]:
MODEL_ID = "anthropic.claude-3-5-sonnet-20240620-v1:0"


def repo_root_from_notebook() -> Path:
    # Assumes notebook sits in <repo>/phase0/*.ipynb
    here = Path.cwd()
    # Try to find a parent containing project marker file (README.md)
    for p in [here, *here.parents]:
        if (p / "README.md").is_file() and (p / "phase0").is_dir():
            return p
    # Fallback: go one level up
    return here.parent


def default_prompt_path() -> str:
    env_override = os.getenv("PROMPT_FILE")
    if env_override:
        return env_override
    root = repo_root_from_notebook()
    return str(root / "phase0" / "prompts" / "caption_generation_prompt.txt")


@dataclass
class BedrockConfig:
    region_name: str
    profile_name: Optional[str]


In [14]:
# Parameters — collaborators can edit these
PROMPT_FILE = default_prompt_path()
IMAGE_PATH = "/absolute/path/to/image.jpg"  # change me
CONTEXT = "sunset beach, chill vibe"  # optional
MAX_TOKENS = 256
TEMPERATURE = 0.6
PRINT_PAYLOAD = True
DRY_RUN = True

print("Using PROMPT_FILE:", PROMPT_FILE)


Using PROMPT_FILE: /Users/dennis/Desktop/Project Blankey/project_blankey/phase0/prompts/caption_generation_prompt.txt


In [None]:
# Build payload
assert os.path.isfile(PROMPT_FILE), f"Prompt not found: {PROMPT_FILE}"
assert os.path.isfile(IMAGE_PATH), f"Image not found: {IMAGE_PATH}"

image_b64 = read_file_b64(IMAGE_PATH)
image_mime = detect_mime_type(IMAGE_PATH)
system_prompt, user_text = build_prompts(PROMPT_FILE, CONTEXT)

payload = build_messages_payload(
    image_b64=image_b64,
    image_mime=image_mime,
    system_prompt=system_prompt,
    user_text=user_text,
    max_tokens=MAX_TOKENS,
    temperature=TEMPERATURE,
)

if PRINT_PAYLOAD:
    print(json.dumps(payload, indent=2)[:4000])


In [None]:
# Create Bedrock client for this session
client = create_bedrock_client(BedrockConfig(region_name=AWS_REGION, profile_name=AWS_PROFILE))
print(f"Bedrock client ready for region={AWS_REGION} profile={AWS_PROFILE}")


In [None]:
# Invoke Bedrock
if not DRY_RUN:
    response = call_bedrock(client, payload)
    caption = extract_caption(response)
    print(caption)
else:
    print("[dry-run] Skipping Bedrock invocation.")


## Updated prompt handling and invocation

- Reads the entire prompt file and substitutes `{video_description}`.
- Sends the filled prompt as the single user text alongside the image.
- Creates a Bedrock client inline (no helper function), using your existing AWS config.


In [None]:
# Parameters for the updated flow
PROMPT_FILE = "phase0/prompts/caption_generation_prompt.txt"  # repo-relative
IMAGE_PATH = "video_cover1.jpg"  # repo-root relative; included in repo
VIDEO_DESCRIPTION = "A calm sunset over the ocean with surfers heading out."
AWS_REGION = os.getenv("AWS_REGION") or os.getenv("AWS_DEFAULT_REGION") or "us-east-1"
MAX_TOKENS = 128
TEMPERATURE = 0.3
PRINT_PAYLOAD = True
DRY_RUN = True

print("Using PROMPT_FILE:", PROMPT_FILE)
print("Using IMAGE_PATH:", IMAGE_PATH)


In [None]:
# Build prompt text by substituting {video_description}
with open(PROMPT_FILE, "r", encoding="utf-8") as f:
    prompt_template = f.read()

filled_prompt = prompt_template.replace("{video_description}", VIDEO_DESCRIPTION)

# Prepare image block
assert os.path.isfile(IMAGE_PATH), f"Image not found: {IMAGE_PATH}"
image_b64 = read_file_b64(IMAGE_PATH)
image_mime = detect_mime_type(IMAGE_PATH)

# Build payload: single user text = filled prompt; include image
payload = {
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": MAX_TOKENS,
    "temperature": TEMPERATURE,
    "messages": [
        {
            "role": "user",
            "content": [
                {
                    "type": "image",
                    "source": {
                        "type": "base64",
                        "media_type": image_mime,
                        "data": image_b64,
                    },
                },
                {"type": "text", "text": filled_prompt},
            ],
        }
    ],
}

if PRINT_PAYLOAD:
    print(json.dumps(payload, indent=2)[:4000])


In [None]:
# Create Bedrock client inline (uses existing AWS config)
session = boto3.Session()
bedrock = session.client("bedrock-runtime", region_name=AWS_REGION)
print("Bedrock client created for region:", AWS_REGION)


In [None]:
# Invoke Bedrock with the built payload
if not DRY_RUN:
    resp = bedrock.invoke_model(modelId=MODEL_ID, body=json.dumps(payload))
    raw = resp.get("body")
    text = raw.read().decode("utf-8") if hasattr(raw, "read") else str(raw)
    response_json = json.loads(text)

    # Extract caption
    content = response_json.get("content", [])
    combined = ""
    for item in content:
        if isinstance(item, dict) and item.get("type") in {"text", "output_text"}:
            combined += item.get("text", "")
    if not combined:
        combined = response_json.get("output_text", "")
    combined = combined.strip()

    try:
        parsed = json.loads(combined)
        print(parsed.get("caption", combined))
    except json.JSONDecodeError:
        print(combined)
else:
    print("[dry-run] Skipping Bedrock invocation.")


# Phase 0 — Bedrock Claude 3.5 Sonnet Caption Generator
- Edit the prompt file at `phase0/prompts/caption_generation_prompt.txt` to iterate without changing code.



In [2]:
# Setup: installs (skip if already installed)
# %pip install -q boto3 botocore

import os
import json
import base64
import mimetypes
from dataclasses import dataclass
from typing import Optional, Tuple

import boto3
from botocore.exceptions import BotoCoreError, ClientError



In [None]:
# Override: hard-code default prompt path (edit if your path changes)
DEFAULT_PROMPT_PATH = \
    "/Users/dennis/Desktop/Project Blankey/project_blankey/phase0/prompts/caption_generation_prompt.txt"
print("Using DEFAULT_PROMPT_PATH:", DEFAULT_PROMPT_PATH)


In [None]:
def read_file_b64(path: str) -> str:
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def detect_mime_type(path: str) -> str:
    guessed, _ = mimetypes.guess_type(path)
    return guessed or "application/octet-stream"


def build_prompts(prompt_file_path: str, context_text: str) -> tuple[str, str]:
    with open(prompt_file_path, "r", encoding="utf-8") as f:
        system_prompt = f.read().strip()

    if context_text and context_text.strip():
        user_text = (
            "Context from user (optional, may be incomplete):\n"
            + context_text.strip()
            + "\n\nRespond with strictly valid JSON only."
        )
    else:
        user_text = "Respond with strictly valid JSON only."

    return system_prompt, user_text


def build_messages_payload(image_b64: str, image_mime: str, system_prompt: str, user_text: str, max_tokens: int, temperature: float = 0.6) -> dict:
    return {
        "anthropic_version": "bedrock-2023-05-31",
        "max_tokens": max_tokens,
        "temperature": temperature,
        "system": system_prompt,
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": image_mime,
                            "data": image_b64,
                        },
                    },
                    {"type": "text", "text": user_text},
                ],
            }
        ],
    }


def create_bedrock_client(config: BedrockConfig):
    session = boto3.Session(profile_name=config.profile_name) if config.profile_name else boto3.Session()
    return session.client("bedrock-runtime", region_name=config.region_name)


def call_bedrock(client, payload: dict) -> dict:
    body = json.dumps(payload)
    resp = client.invoke_model(modelId=MODEL_ID, body=body)
    raw = resp.get("body")
    text = raw.read().decode("utf-8") if hasattr(raw, "read") else str(raw)
    return json.loads(text)


def extract_caption(model_response: dict) -> str:
    content = model_response.get("content", [])
    text = ""
    for item in content:
        if isinstance(item, dict) and item.get("type") in {"text", "output_text"}:
            text += item.get("text", "")
    if not text:
        text = model_response.get("output_text", "")
    text = text.strip()
    try:
        parsed = json.loads(text)
        cap = parsed.get("caption", "").strip()
        if cap:
            return cap
    except json.JSONDecodeError:
        pass
    return text



In [None]:
# Parameters — edit these
AWS_REGION = os.getenv("AWS_REGION") or os.getenv("AWS_DEFAULT_REGION") or "us-east-1"
AWS_PROFILE = os.getenv("AWS_PROFILE") or None
PROMPT_FILE = DEFAULT_PROMPT_PATH  # Or set to a custom path
IMAGE_PATH = "/absolute/path/to/image.jpg"  # change me
CONTEXT = "sunset beach, chill vibe"  # optional
MAX_TOKENS = 256
TEMPERATURE = 0.6
PRINT_PAYLOAD = True
DRY_RUN = True


In [None]:
# Build payload
assert os.path.isfile(PROMPT_FILE), f"Prompt not found: {PROMPT_FILE}"
assert os.path.isfile(IMAGE_PATH), f"Image not found: {IMAGE_PATH}"

image_b64 = read_file_b64(IMAGE_PATH)
image_mime = detect_mime_type(IMAGE_PATH)
system_prompt, user_text = build_prompts(PROMPT_FILE, CONTEXT)

payload = build_messages_payload(
    image_b64=image_b64,
    image_mime=image_mime,
    system_prompt=system_prompt,
    user_text=user_text,
    max_tokens=MAX_TOKENS,
    temperature=TEMPERATURE,
)

if PRINT_PAYLOAD:
    print(json.dumps(payload, indent=2)[:4000])  # truncate for display


In [None]:
# Create Bedrock client (optional explicit step)
client = create_bedrock_client(BedrockConfig(region_name=AWS_REGION, profile_name=AWS_PROFILE))
print(f"Bedrock client ready for region={AWS_REGION} profile={AWS_PROFILE}")


In [None]:
# Invoke Bedrock using existing client (recommended for repeated runs)
if not DRY_RUN:
    assert 'client' in globals(), "Run the client creation cell first."
    response = call_bedrock(client, payload)
    caption = extract_caption(response)
    print(caption)
else:
    print("[dry-run] Skipping Bedrock invocation.")


In [None]:
# Invoke Bedrock (skip if DRY_RUN)
if not DRY_RUN:
    client = create_bedrock_client(BedrockConfig(region_name=AWS_REGION, profile_name=AWS_PROFILE))
    response = call_bedrock(client, payload)
    caption = extract_caption(response)
    print(caption)
else:
    print("[dry-run] Skipping Bedrock invocation.")
