<a href="https://colab.research.google.com/github/Yansun3/vertex-ai-creative-studio/blob/main/%5BBlogpost%5D_Case_Study_2_Cymber_Coffee_Video_Generation_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==============================================================================
# 1. SETUP AND AUTHENTICATION
# ==============================================================================

# Imports
import os
import json
import time
from IPython.display import display, Markdown, Image, Video
from google import genai
from google.genai import types
from google.colab import files, auth

# ==============================================================================
# 2. CONFIGURATION AND CONSTANTS
# ==============================================================================


# 1. Enter your Google Cloud Project ID below.
PROJECT_ID = ""
LOCATION = ""

# 2. Authenticate with Google Cloud
print("üîê Authenticating with Google Cloud...")
try:
    auth.authenticate_user()
    print(f"‚úÖ Authenticated. Will use project: {PROJECT_ID}")
except Exception as e:
    print(f"‚ùå Authentication failed: {e}")
    print("Veo video generation will likely fail without active Cloud authentication.")

# 3. Initialize the Client for Vertex AI
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

# Model Configuration
GEMINI_MODEL = ""
IMAGE_GEN_MODEL = ""
VIDEO_GEN_MODEL = ""

# JSON Schema for the structured evaluation (unchanged)
EVALUATION_SCHEMA = types.Schema(
    type=types.Type.OBJECT,
    properties={
        "guidelineAdherence": types.Schema(
             type=types.Type.OBJECT,
             description="How well the target asset aligns with the provided reference brand guidelines.",
             properties={
                 "adherenceScore": types.Schema(type=types.Type.INTEGER, description="Score from 1-10 on strict adherence."),
                 "deviations": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING), description="List of specific deviations."),
             }
        ),
        "colorPaletteEvaluation": types.Schema(
            type=types.Type.OBJECT,
            description="Evaluation of the asset's color use.",
            properties={
                "identifiedColors": types.Schema(type=types.Type.ARRAY, items=types.Schema(type=types.Type.STRING), description="List of primary colors detected."),
                "colorAnalysis": types.Schema(type=types.Type.STRING, description="Analysis of color harmony and brand matching."),
            }
        ),
        "gibberishCheck": types.Schema(
            type=types.Type.OBJECT,
            description="Check for unintended or gibberish text/artifacts.",
            properties={
                "gibberishDetected": types.Schema(type=types.Type.BOOLEAN, description="True if artifacts are present."),
                "details": types.Schema(type=types.Type.STRING, description="Specifics of artifacts."),
            }
        ),
        "logoConsistencyEvaluation": types.Schema(
            type=types.Type.OBJECT,
            description="Evaluation of logo elements.",
            properties={
                "logoFound": types.Schema(type=types.Type.BOOLEAN, description="True if a logo is visible."),
                "consistencyReport": types.Schema(type=types.Type.STRING, description="Report on logo appearance and placement."),
            }
        ),
        "abstractComplianceEvaluation": types.Schema(
            type=types.Type.OBJECT,
            description="Evaluation of aesthetic and tonal compliance.",
            properties={
                "brandToneScore": types.Schema(type=types.Type.INTEGER, description="Score 1-10 on brand tone evocation."),
                "aestheticsScore": types.Schema(type=types.Type.INTEGER, description="Score 1-10 on beauty and composition."),
                "abstractSummary": types.Schema(type=types.Type.STRING, description="Rationale for scores.")
            }
        ),
        "assetQualitySummary": types.Schema(type=types.Type.STRING, description="Concise summary of all weaknesses and strengths to guide prompt rewriting.")
    }
)

# ==============================================================================
# 3. UTILITY FUNCTIONS (File Handling)
# ==============================================================================

def get_mime_type(filename):
    ext = filename.lower().split('.')[-1]
    if ext in ['png']: return 'image/png'
    if ext in ['jpg', 'jpeg']: return 'image/jpeg'
    if ext in ['pdf']: return 'application/pdf'
    if ext in ['txt']: return 'text/plain'
    if ext in ['mp4']: return 'video/mp4'
    if ext in ['mov']: return 'video/quicktime'
    if ext in ['avi']: return 'video/x-msvideo'
    if ext in ['mpeg', 'mpg']: return 'video/mpeg'
    return None

def process_uploaded_file(uploaded, file_name, is_reference=False):
    file_bytes = uploaded[file_name]
    mime_type = get_mime_type(file_name)
    if not mime_type: return None

    if mime_type == 'text/plain':
        text_content = file_bytes.decode('utf-8')
        label = "REFERENCE GUIDELINE" if is_reference else "TARGET ASSET"
        return types.Part(text=f"\n--- {label} ({file_name}) ---\n{text_content}\n")
    else:
        print(f"Processed {'Reference' if is_reference else 'Target'}: {file_name} ({mime_type})")
        return types.Part.from_bytes(data=file_bytes, mime_type=mime_type)

def collect_inputs():
    print("--- 1. Setup Context ---")
    context_prompt = input("Enter a description of the desired final asset: ").strip()
    brand_tone = input("Enter the Desired Brand Tone: ").strip()

    if not context_prompt or not brand_tone: return None

    reference_parts = []
    print("\n--- 2. Source 1: Reference Brand Guidelines ---")
    print("Upload up to 6 files (Images, PDF, TXT, Video). Cancel upload dialog to finish.")
    try:
        uploaded_refs = files.upload()
        for file_name in uploaded_refs:
            if len(reference_parts) >= 6: break
            part = process_uploaded_file(uploaded_refs, file_name, is_reference=True)
            if part: reference_parts.append(part)
    except Exception: pass

    print("\n--- 3. Source 2: Target Asset for Evaluation ---")
    print("Upload ONE asset to evaluate.")
    target_part = None
    try:
        uploaded_target = files.upload()
        if uploaded_target:
            file_name = list(uploaded_target.keys())[0]
            target_part = process_uploaded_file(uploaded_target, file_name, is_reference=False)
    except Exception: pass

    if not target_part: return None

    final_contents = [
        types.Part(text=f"You are a Brand Compliance Officer. Analyze TARGET ASSET against REFERENCE GUIDELINES.\nContext: {context_prompt}.\nTone: {brand_tone}.\nEvaluate strictly based on references. Return JSON schema.\n\n--- BEGIN REFERENCES ---")
    ]
    final_contents.extend(reference_parts)
    final_contents.append(types.Part(text="\n--- END REFERENCES. BEGIN TARGET ASSET ---\n"))
    final_contents.append(target_part)

    return final_contents

# ==============================================================================
# 4. CORE LOGIC FUNCTIONS
# ==============================================================================

def evaluate_asset(content_parts):
    """Step 2: Gemini Flash evaluation."""
    print("\n--- 4. Evaluating Asset (Gemini Flash) ---")
    try:
        response = client.models.generate_content(
            model=GEMINI_MODEL,
            contents=content_parts,
            config=types.GenerateContentConfig(response_mime_type="application/json", response_schema=EVALUATION_SCHEMA)
        )
        evaluation_data = json.loads(response.candidates[0].content.parts[0].text)
        display(Markdown("## üìã Evaluation Findings\n" + f"**Summary**: {evaluation_data.get('assetQualitySummary')}"))
        return evaluation_data
    except Exception as e:
        print(f"‚ùå Evaluation failed: {e}")
        return None

def rewrite_prompt(evaluation_data, modality="image"):
    """Step 3: Rewrites prompt based on evaluation and desired modality."""
    print(f"\n--- 5. Rewriting Prompt for {modality.upper()} (Gemini Flash) ---")
    summary = evaluation_data.get('assetQualitySummary', 'N/A')

    # Modality-specific instructions
    modality_instruction = "an optimized IMAGE prompt"
    if modality == "video":
        # Explicitly forbid audio elements in the prompt
        modality_instruction = (
            "an optimized VIDEO prompt. Include details about camera movement, pacing, and scene transition "
            "if necessary to match the brand tone. IMPORTANT: Do NOT include any audio, sound effects, "
            "music, or dialogue descriptions in the final prompt."
        )

    system_prompt = (
        f"You are an AI Prompt Optimization Expert. Rewrite the user's desired asset context based on the evaluation. "
        f"Create {modality_instruction} that strictly adheres to the brand guidelines and corrects identified weaknesses."
    )

    user_query = f"Evaluation Summary: {summary}\nFull Data: {json.dumps(evaluation_data)}\n\nProvide the single, optimized {modality} prompt."

    try:
        response = client.models.generate_content(
            model=GEMINI_MODEL,
            contents=[user_query],
            config=types.GenerateContentConfig(system_instruction=types.Part(text=system_prompt))
        )
        new_prompt = response.candidates[0].content.parts[0].text.strip()
        display(Markdown(f"## üìù Optimized {modality.title()} Prompt\n\n> *{new_prompt}*\n"))
        return new_prompt
    except Exception as e:
        print(f"‚ùå Rewrite failed: {e}")
        return None

def generate_image(image_prompt):
    """Step 4a: Generate Image."""
    print(f"\n--- 6. Generating Image ({IMAGE_GEN_MODEL}) ---")
    try:
        response = client.models.generate_content(model=IMAGE_GEN_MODEL, contents=image_prompt)
        for part in response.parts:
            if part.inline_data:
                display(Image(data=part.inline_data.data))
                return
        print("‚ö†Ô∏è No image returned.")
    except Exception as e:
        print(f"‚ùå Image generation failed: {e}")

def generate_video(video_prompt):
    """Step 4b: Generate Video using Veo (Asynchronous)."""
    print(f"\n--- 6. Generating Video ({VIDEO_GEN_MODEL}) ---")

    if PROJECT_ID == "YOUR_PROJECT_ID_HERE":
         print("‚ùå ERROR: You must set your Google Cloud PROJECT_ID at the top of the script.")
         return

    try:
        print("‚è≥ Submitting video generation job...")

        operation = client.models.generate_videos(
            model=VIDEO_GEN_MODEL,
            prompt=video_prompt,
            config=types.GenerateVideosConfig(
                aspect_ratio="16:9",
                duration_seconds=6,
                resolution="720p",

                enhance_prompt=True,
                # Disabled audio generation to match prompt instructions
                generate_audio=False
            )
        )

        print("‚è≥ Job submitted. Polling for completion (this may take 1-2 minutes)...")
        # Poll until the operation is complete
        while not operation.done:
            time.sleep(15)
            operation = client.operations.get(operation)
            print(f"Status: {operation.status if hasattr(operation, 'status') else 'Processing...'}")

        # Check for errors specifically
        if hasattr(operation, 'error') and operation.error:
            print(f"‚ùå Video generation failed with error: {operation.error}")
            return

        # Retrieve and display result
        if hasattr(operation, 'result') and operation.result and operation.result.generated_videos:
            print("‚úÖ Video Generated Successfully.")
            video_bytes = operation.result.generated_videos[0].video.video_bytes

            video_filename = "generated_video.mp4"
            with open(video_filename, "wb") as f:
                f.write(video_bytes)

            display(Markdown("## üé¨ Final Generated Video"))
            display(Video(video_filename, embed=True, width=600))
        else:
             print("‚ö†Ô∏è Video generation finished but no content was returned.")
             # Print full operation for deeper debugging if it happens again
             print(f"Debug - Full Operation Details: {operation}")

    except Exception as e:
         print(f"‚ùå Video generation failed: {e}")

# ==============================================================================
# 5. EXECUTION BLOCK
# ==============================================================================

def main_workflow():
    content_parts = collect_inputs()
    if not content_parts: return

    evaluation_data = evaluate_asset(content_parts)
    if not evaluation_data: return

    # Ask user for desired output modality
    print("\n--- Select Generation Type ---")
    modality_choice = input("Generate (I)mage or (V)ideo based on this evaluation? [i/V]: ").lower().strip()
    modality = "video" if modality_choice == 'v' else "image"

    new_prompt = rewrite_prompt(evaluation_data, modality=modality)
    if not new_prompt: return

    # Allow manual edit of the prompt
    print("\n--- Review Prompt ---")
    print(f"Current Prompt: {new_prompt}")
    user_edit = input("Press Enter to keep this prompt, or type a revised version here: ").strip()
    if user_edit:
        new_prompt = user_edit
        print("‚úÖ Prompt updated.")
    else:
        print("‚úÖ Using generated prompt.")

    if modality == "video":
        generate_video(new_prompt)
    else:
        generate_image(new_prompt)

if __name__ == "__main__":
    main_workflow()

üîê Authenticating with Google Cloud...
‚úÖ Authenticated. Will use project: genai-blackbelt-fishfooding
--- 1. Setup Context ---
Enter a description of the desired final asset: Genereate a Thanksgiving campaign video for Cymber Coffee
Enter the Desired Brand Tone: Cymber Coffee is the public perception, built cup by cup by the people who experience our coffee and our service. Cymber Coffee is the pathway to authentic, transparently sourced coffee, always presented with genuine expertise and a soothing, natural, warmth

--- 2. Source 1: Reference Brand Guidelines ---
Upload up to 6 files (Images, PDF, TXT, Video). Cancel upload dialog to finish.


Saving Cymber Coffee Brand Guideline .pdf to Cymber Coffee Brand Guideline  (7).pdf
Processed Reference: Cymber Coffee Brand Guideline  (7).pdf (application/pdf)

--- 3. Source 2: Target Asset for Evaluation ---
Upload ONE asset to evaluate.


Saving Gemini Flash Image Generated Cymber Coffee .png to Gemini Flash Image Generated Cymber Coffee  (9).png
Processed Target: Gemini Flash Image Generated Cymber Coffee  (9).png (image/png)

--- 4. Evaluating Asset (Gemini Flash) ---


## üìã Evaluation Findings
**Summary**: The asset successfully conveys the brand's 'soothing, natural warmth' tone and organic aesthetic, making it suitable for a Thanksgiving campaign. Strengths include the effective use of brand colors, appealing natural illustrations, and a generally harmonious composition. Key weaknesses are the use of an unapproved script/serif font for the main headline, which deviates significantly from the Montserrat Bold requirement for primary display text. Additionally, the specific circular lockup of the brand logo is not among the approved configurations, and the inner circle color of the logo deviates from the defined palette. To improve, ensure all text adheres to the specified typography guidelines and use one of the approved logo lockups (preferably the Vertical Lockup) with the correct color applications as detailed in the brand guide.


--- Select Generation Type ---
Generate (I)mage or (V)ideo based on this evaluation? [i/V]: V

--- 5. Rewriting Prompt for VIDEO (Gemini Flash) ---


## üìù Optimized Video Prompt

> *A short, serene video embodying "soothing, natural warmth" and an organic, unpretentious aesthetic, suitable for a Thanksgiving campaign. The visual style is calm, deliberate, and uses the brand's core color palette of Forest Green, Bronze/Oak, Off-White, and Moss Gray.

**Scene 1:** The video opens with a gentle, slow push-in shot on a hand (softly lit, creating warm shadows) delicately holding a Moss Gray ceramic coffee mug. Wisps of steam rise gently from the mug. The background is a soft, blurred Off-White, hinting at a cozy, rustic setting. The camera movement is unhurried and smooth.

**Scene 2:** A seamless, slow cross-fade transitions to an overhead shot of a rustic wooden surface (Bronze/Oak tones). Scattered artfully across it are stylized, natural illustrations of coffee beans and delicate autumnal leaves in varying shades of Forest Green and Bronze/Oak. A minimalist, dark-colored pour-over coffee maker gently drips into the Moss Gray mug from Scene 1, now placed on the wooden surface. The camera slowly pans across the scene, highlighting the organic elements and the careful preparation.

**Scene 3:** A smooth, gentle zoom-in focuses on the steaming coffee in the Moss Gray mug. The rich, deep color of the coffee is emphasized. Overlaid text appears slowly: "Warmth & Gratitude," displayed in Montserrat Bold font, in an Off-White color, positioned harmoniously within the frame. The text holds for a moment, conveying a sense of calm reflection.

**Scene 4:** A soft, organic transition (like a slow dissolve or subtle wipe that mimics a natural unfolding) reveals a stylized illustration of coffee leaves and berries in Forest Green hues, accompanied by delicate, illustrative autumnal branches in Bronze/Oak tones. The texture of natural paper or canvas is subtly hinted at. Text fades in gently: "Seasonal Harvests," in Montserrat Bold font, in an Off-White color, placed to complement the illustration. The camera drifts slowly across the illustration, emphasizing its natural beauty.

**Scene 5:** The final shot is a slow, controlled reveal of the brand's Vertical Lockup logo. It appears centered against a clean Off-White background with a subtle, natural texture. The logo features a solid Forest Green background rectangle, with the "CYMBER COFFEE" text in Off-White and the coffee leaf icon in Forest Green. The clear space around the logo is ample, maintaining a sense of calm and importance. The camera is static, holding the logo clearly and steadily.

**Pacing:** The overall pacing of the video is slow, deliberate, and meditative, reflecting the brand's "Cultivated with Care, Shared with Calm" ethos.
**Camera Movement:** All camera movements are gentle, smooth, and unhurried, featuring slow pushes, pans, zooms, and drifts.
**Scene Transitions:** Transitions are organic and seamless, such as slow cross-fades, soft dissolves, or subtle wipes, maintaining the continuous flow and soothing atmosphere.*



--- Review Prompt ---
Current Prompt: A short, serene video embodying "soothing, natural warmth" and an organic, unpretentious aesthetic, suitable for a Thanksgiving campaign. The visual style is calm, deliberate, and uses the brand's core color palette of Forest Green, Bronze/Oak, Off-White, and Moss Gray.

**Scene 1:** The video opens with a gentle, slow push-in shot on a hand (softly lit, creating warm shadows) delicately holding a Moss Gray ceramic coffee mug. Wisps of steam rise gently from the mug. The background is a soft, blurred Off-White, hinting at a cozy, rustic setting. The camera movement is unhurried and smooth.

**Scene 2:** A seamless, slow cross-fade transitions to an overhead shot of a rustic wooden surface (Bronze/Oak tones). Scattered artfully across it are stylized, natural illustrations of coffee beans and delicate autumnal leaves in varying shades of Forest Green and Bronze/Oak. A minimalist, dark-colored pour-over coffee maker gently drips into the Moss Gray m

## üé¨ Final Generated Video