Copyright 2025 Google LLC

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    https://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.


# Gen V Colab


## Environment Setup




In [None]:
# @title Install dependencies
!pip install --upgrade --quiet google-genai
!pip install --quiet mediapy
!pip install --quiet moviepy==2.1.2
!pip install --quiet 'git+https://github.com/google-marketing-solutions/gen-v.git@main#egg=gtech-gen-v&subdirectory=backend'

In [None]:
# @title Imports
import base64
import ipywidgets as widgets
import mediapy as media
import moviepy as mp
import os
import sys

from datetime import date
from IPython.display import clear_output
from gen_v import config
from gen_v import models
from gen_v import storage as gcs
from gen_v import utils
from gen_v import video
from google import genai

from PIL import Image as PIL_Image
from PIL import ImageDraw, ImageFont

In [None]:
# @title Authenticate User

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()


## Parameters Setup

In [None]:
# @title GCP Parameters

GCP_PROJECT_ID = 'your-project-id'  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
GEMINI_PROJECT_ID = GCP_PROJECT_ID
IMAGEN_PROJECT_ID = GCP_PROJECT_ID
VEO_PROJECT_ID = GCP_PROJECT_ID

GCP_BUCKET_NAME = 'your-bucket-name' # @param {type: "string", placeholder: "bucket-name-without-path"}
INPUT_IMAGE_BUCKET_NAME = GCP_BUCKET_NAME
FOLDER_NAME = 'your-folder-name' #@param {type: "string"}
INPUT_AUDIO_BUCKET_PATH = f'{FOLDER_NAME}/audio/'

OUTPUT_IMAGES_BUCKET_NAME = GCP_BUCKET_NAME
OUTPUT_IMAGES_BUCKET_PATH = f'{FOLDER_NAME}/output-images/'
OUTPUT_VIDEOS_BUCKET_NAME = GCP_BUCKET_NAME
OUTPUT_VIDEOS_BUCKET_PATH = f'{FOLDER_NAME}/output-videos/'
OUTPUT_VIDEOS_URI = f"gs://{OUTPUT_VIDEOS_BUCKET_NAME}/{OUTPUT_VIDEOS_BUCKET_PATH}"


TMP_STRING = '/content'

# Set current date variables, used in GCS URI paths
CURRENT_YEAR, CURRENT_WEEK, _ = date.today().isocalendar()
WEEK_AND_YEAR = f"week{CURRENT_WEEK}-{CURRENT_YEAR}"

IMAGE_OVERLAYS_PATH = f"{OUTPUT_VIDEOS_BUCKET_NAME}/{OUTPUT_VIDEOS_BUCKET_PATH}image_overlays/{WEEK_AND_YEAR}"
FINAL_OVERLAYS_PATH = f"{OUTPUT_VIDEOS_BUCKET_NAME}/{OUTPUT_VIDEOS_BUCKET_PATH}final_overlays/{WEEK_AND_YEAR}"
OUTPUT_URI_PATH = f"{OUTPUT_IMAGES_BUCKET_NAME}/{OUTPUT_IMAGES_BUCKET_PATH}{WEEK_AND_YEAR}"

VEO_OVERLAYS_FOLDER = f'{OUTPUT_VIDEOS_BUCKET_NAME}/{OUTPUT_VIDEOS_BUCKET_PATH}final_overlays/'
VEO_CLIPS_URI = f"{VEO_OVERLAYS_FOLDER}{WEEK_AND_YEAR}/"

STITCHING_OUTPUT_URI = f'{OUTPUT_VIDEOS_URI}concatenated/{WEEK_AND_YEAR}/'

folder_names = ['audio', 'fonts', 'input-images', 'input-overlays', 'input-videos', 'logos']

gcs.create_gcs_folders_in_subfolder(GCP_BUCKET_NAME, FOLDER_NAME, folder_names)
gcs.create_gcs_folders_in_subfolder(GCP_BUCKET_NAME, f'{FOLDER_NAME}/input-images',[WEEK_AND_YEAR])

In [None]:
#@title GenAI models parameters
LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")
video_model = "veo-2.0-generate-001"
gemini_model = "gemini-2.0-flash"

In [None]:
# @title Image Editing Parameters

RESIZED_IMAGE_WIDTH = 1280 # @param
RESIZED_IMAGE_HEIGHT = 720 # @param
# @markdown ----

# @markdown Enable this to show images during processing
COLOR_BACKGROUND_REPLACEMENT = True # @param {type: 'boolean'}

BACKGROUND_RED = 255 # @param
BACKGROUND_GREEN = 224 # @param
BACKGROUND_BLUE = 77 # @param
BACKGROUND_TRANSPARENCY = 255

ORIGINAL_BACKGROUND_COLOR = models.RGBColor.from_tuple((255,255,255))
BACKGROUND_COLOR = models.RGBColor.from_tuple((
    BACKGROUND_RED,
    BACKGROUND_GREEN,
    BACKGROUND_BLUE
))


# Use this to convert HEX color to RGB
# hex_color = "#fcdc4c"
# rgb_color = utils.hex_to_rgb(hex_color)
# print(rgb_color)  # Output: (R, G, B)

# @markdown ----

# @markdown Enable this to show images during processing
ENABLE_SHOW_IMAGES_INLINE = False # @param {type: "boolean"}
SHOW_IMAGE_HEIGHT = 500 # @param [250,500]

In [None]:
# @title Video Generation Parameters

# @markdown Set VEO parameters
DURATION = 5  # @param {type:"slider", min:5, max:8, step:1}
SAMPLE_COUNT = 2  # @param {type:"slider", min:1, max:4, step:1}
NEGATIVE_PROMPT = "copyrighted content"  # @param {type: 'string'}
PROMPT_ENHANCE = True  # @param {type: 'boolean'}
PERSON_GENERATION = "allow_adult"  #@param ["allow_adult", "dont_allow"]

# @markdown ----

# @markdown Decide which prompt to use
PROMPT_TYPE = "CUSTOM" # @param ["CUSTOM", "GEMINI"]
# @markdown Your Veo prompt aka CUSTOM
CUSTOM_VIDEO_PROMPT = "Animate this image in a way that is most appropriate for the content in the image" # @param ["Animate this image in a way that is most appropriate for the content in the image"] {"allow-input":true}
# @markdown Let Gemini generate a video prompt aka GEMINI
GENERATE_VIDEO_PROMPT = "Analyse the image and write a prompt for a generative video AI to animate the video in the most appropriate way for the content to be displayed in an online ad.  Consider the function of the main object in the image when deciding how to animate it.  If there is a background, focus on animating the primary object only. Output the prompt only. Don't show any of the anlaysis or headings in your response, only provide the prompt you created." # @param ["Analyse the image and write a prompt for a generative video AI to animate the video in the most appropriate way for the content to be displayed in an online ad.  Consider the function of the main object in the image when deciding how to animate it.  If there is a background, focus on animating the primary object only. Output the prompt only. Don't show any of the anlaysis or headings in your response, only provide the prompt you created."] {"allow-input":true}

# @markdown ----

VIDEO_ORIENTATION = "LANDSCAPE" # @param ["LANDSCAPE", "PORTRAIT"]


In [None]:
# @title Overlay Parameters

LOGO_FILE_NAME = "logo.png" #@param {type: 'string'}
LOGO_URI = f'gs://{GCP_BUCKET_NAME}/{FOLDER_NAME}/logos/{LOGO_FILE_NAME}'
STICKER_FILE_NAME = "sticker.png" #@param {type: 'string'}
STICKER_URI = f'gs://{GCP_BUCKET_NAME}/{FOLDER_NAME}/input-overlays/{STICKER_FILE_NAME}'
FONT_FILE_NAME = "font.ttf" #@param {type: 'string'}
FONT_URI = f'gs://{GCP_BUCKET_NAME}/{FOLDER_NAME}/fonts/{FONT_FILE_NAME}'

OVERLAY_WIDTH = RESIZED_IMAGE_WIDTH
OVERLAY_HEIGTH = RESIZED_IMAGE_HEIGHT

LOGO_POSITION = (50, 520) #@param
LOGO_DESIRED_HEIGHT = 150 #@param Set as 0 to avoid auto-scaling
LOGO_START = 0 #@param
LOGO_DURATION = 5 #@param


STICKER_POSITION = (50, 50) #@param
STICKER_START = 0 #@param
STICKER_DURATION = 5 #@param
STICKER_DESIRED_HEIGHT = 100 #@param

TEXT_FONT_SIZE = 30 #@param
TEXT_START = 0 #@param
TEXT_DURATION = 5 #@param
TEXT_COLOR = 'blue'   #@param
TEXT_POSITION = (850, 50)  #@param

GCS_IMAGES_TEST: list[models.ImageInput] = [
    models.ImageInput(
        path=LOGO_URI,
        start=LOGO_START,
        position=LOGO_POSITION,
        duration=LOGO_DURATION,
        height=LOGO_DESIRED_HEIGHT
      ),
    models.ImageInput(
        path=STICKER_URI,
        start=STICKER_START,
        position=STICKER_POSITION,
        duration=STICKER_DURATION,
        height=STICKER_DESIRED_HEIGHT
      )
    ]

TEXT_TEST = models.TextInput(
    text="text_to_display",
    font=FONT_URI,
    font_size=TEXT_FONT_SIZE,
    start_time=TEXT_START,
    duration=TEXT_DURATION,
    color=TEXT_COLOR,
    position=TEXT_POSITION
    )


In [None]:
# @title Stitching Parameters
OUTPUT_WITHOUT_AUDIO = 'video_transition_demo.mp4'
OUTPUT_WITH_AUDIO = 'video_transition_demo_audio.mp4'

VERBOSE = False # @param {type:"boolean"}
TRANSITION = "SWIPE" # @param ["CROSS_FADE", "FADE_IN", "SWIPE", "SLIDE_IN"]
TRANSITON_DURATION = 0.5 # @param {type:"number"}
TRANSITION_SIDE = "left" # @param ["left", "right", "top", "bottom"]

TRANSITION_TEST = models.VideoTransition(
    name=TRANSITION,
    padding=TRANSITON_DURATION,
    side=TRANSITION_SIDE
)
OUTPUT_LENGHT = 23 # @param {type:"slider", min:10, max:30, step:1}
TRIM_FROM = "end" # @param ["start", "end"]
TRIM_ENABLED = True # @param {type:"boolean"}

UPSCALE_FACTOR = 2 #@param

In [None]:
#@title Define app settings

APP_SETTINGS = config.AppSettings(
  gcp_project_id=GCP_PROJECT_ID,
  gcp_bucket_name=GCP_BUCKET_NAME,
  gcs_folder_name=FOLDER_NAME,
  gcp_region=LOCATION,

  veo_model_name=video_model,
  veo_duration_seconds=DURATION,
  veo_sample_count=SAMPLE_COUNT,
  veo_negative_prompt=NEGATIVE_PROMPT,
  veo_prompt_enhance=PROMPT_ENHANCE,
  veo_person_generation=PERSON_GENERATION,

  gemini_model_name=gemini_model,
  prompt_type=PROMPT_TYPE,
  custom_video_prompt=CUSTOM_VIDEO_PROMPT,
  gemini_base_prompt=GENERATE_VIDEO_PROMPT,

  video_orientation=VIDEO_ORIENTATION,
)

## Backend Functions

In [None]:
# @title Get User Video Selection Functions
def get_user_choice_with_videos(videos: list[dict], on_selection_callback) -> list[dict]:
    """
    Allows the user to choose multiple options from a list with video thumbnails.

    Args:
        videos: A list of dictionaries, where each dictionary represents a video
                and contains keys "local_file" for the local video path,
                "product_title" for the video title, and "promo_text"
                for any promotional text.
        on_selection_callback: A function to call with the selected videos
                once the user submits.

    Returns:
        A list of dictionaries, each containing gcs_url, and title
        of the selected videos.
    """
    # Create checkboxes and video widgets
    checkboxes = [widgets.Checkbox(
        value=False,
        description=f"{video['product_title']}") for video in videos]

    # Create video elements using local file paths
    video_elements = []
    for video in videos:
        local_video_path = video['local_file']

        # Encode the video file as base64
        with open(local_video_path, "rb") as f:
            video_data = f.read()
        encoded_video = base64.b64encode(video_data).decode()

        # Create video element using base64 encoded data
        video_element = f"""
        <video width="320" height="240" controls>
            <source src="data:video/mp4;base64,{encoded_video}" type="video/mp4">
            Your browser does not support the video tag.
        </video>
        """
        video_elements.append(widgets.HTML(value=video_element))

    # Create a text field for promo_title
    promo_title_inputs = [widgets.Text(
        value="",
        placeholder="Enter promo text:",
        description=f"{video['promo_text']}") for video in videos]

    # Create a container to arrange checkboxes and videos
    items = []
    for checkbox, video_widget, promot_title_input in zip(
        checkboxes, video_elements, promo_title_inputs
    ):
      items.extend([
          video_widget,
          checkbox,
          widgets.HTML(value="<br>"),
          promot_title_input
      ])
    container = widgets.VBox(items)

    display(container)

    def on_button_clicked(button):
        """This function processes the selected videos and promo texts, and
        provides feedback to the user.
        """
        try:
          selected_videos = []
          clear_output(wait=True)  # Clear previous output
          for i, checkbox in enumerate(checkboxes):
            if checkbox.value:
                video = videos[i]
                selected_videos.append(video)
                video['promo_text'] = promo_title_inputs[i].value
          print(selected_videos)
          on_selection_callback(selected_videos) # Call the provided callback
          return selected_videos
        except Exception as e:
          print(f'Error on_button_click {e} in get_user_choice_with_videos()')

    submit_button = widgets.Button(description="Submit")
    submit_button.on_click(on_button_clicked)
    display(submit_button)


In [None]:
#@title Video Stitching Functions
def select_videos_for_concatenation(
    local_video_paths: list[str],
    transition: models.VideoTransition,
    output_length: int,
    trim_location: str,
    audio_inputs: list[models.AudioInput],
    gcs_uri: str,
    output_without_audio: str,
    output_with_audio: str
  ) -> list:
  """Selects videos for concatenation and returns them with an audio clip.

  Args:
    local_video_paths: A list of local paths to the video files.
    transition: A VideoTransition object with the transition type and duration.
    output_length: The desired length of the output video in seconds.
    trim_location: Where to trim the videos ("start" or "end").
    audio_inputs: A list of AudioInput objects for audio overlay.
    gcs_uri: The GCS URI for uploading the final video.

  Returns:
    A list of selected videos
    """
  try:
    checkboxes = [
        widgets.Checkbox(value=False, description=path)
        for path in local_video_paths
    ]
    container = widgets.VBox(checkboxes)
    display(container)

    submit_button = widgets.Button(description="Submit")
    display(submit_button)

    selected_videos = []

    def on_button_clicked(button):
      nonlocal selected_videos
      clear_output(wait=True)
      for path, checkbox in zip(local_video_paths, checkboxes):
        if checkbox.value:
          selected_videos.append(path)
      final_video = video.concatenate_video_clips(
          selected_videos,
          transition,
          output_length,
          trim_location,
          resized_image_width = RESIZED_IMAGE_WIDTH,
          resized_image_height = RESIZED_IMAGE_HEIGHT,
          tmp_string = TMP_STRING
      )

      gcs_uri_video = f'{gcs_uri}{output_without_audio}'
      gcs_uri_audio = f'{gcs_uri}{output_with_audio}'

      video.add_audio_clips_to_video(final_video, audio_inputs, gcs_uri_audio)
      gcs.upload_file_to_gcs(final_video, gcs_uri_video)

    submit_button.on_click(on_button_clicked)

    return selected_videos
  except Exception as e:
    print(f'Error select_videos_for_concatenation {e}')


def stitch_videos_with_transitions(
    veo_clips_uri: str,
    stiching_output_uri: str,
    output_with_audio: str,
    output_without_audio: str,
    transition: str,
    transition_duration: float,
    transition_side: str,
    desired_length: int,
    trim_from: str,
    settings: config.AppSettings
) -> None:
    """Stitches videos with transitions, overlays audio, and uploads to gcs.

    Args:
        veo_clips_uri: The GCS URI of the folder containing Veo overlay videos.
        stiching_output_uri: The GCS URI for the stitched video to be uploaded.
        output_with_audio: The filename for the stitched video with audio.
        output_without_audio: The filename for the stitched video without audio.
        transition: The name of the transition to use (e.g., "CROSS_FADE"...).
        transition_duration: The duration of the transition in seconds.
        transition_side: The side of the transition (e.g., "left", "right").
        desired_length: The desired length of the output video in seconds.
        trim_from: Where to trim the video ("start" or "end").
        settings: An instance of AppSettings containing configuration

    Returns:
        None.
    """
    # 1. Retrieve videos and audio from GCS
    intro_outro_videos = gcs.retrieve_all_files_from_gcs_folder(
        settings.intro_outro_videos_uri
    )
    input_veo_clips = gcs.retrieve_all_files_from_gcs_folder(veo_clips_uri)
    input_videos = video.merge_arrays(intro_outro_videos, input_veo_clips)
    input_audio = gcs.retrieve_all_files_from_gcs_folder(settings.audio_uri)

    video_transition = models.VideoTransition(
        name=transition,
        padding=transition_duration,
        side=transition_side
    )

    # 2. Download videos and audio locally
    local_video_paths = gcs.download_files(input_videos)
    local_audio_paths = [
        models.AudioInput(path=path) for path in gcs.download_files(input_audio)
    ]

    # 3. Concatenate videos, apply transitions, and overlay audio
    selected_videos = select_videos_for_concatenation(
        local_video_paths,
        video_transition,
        desired_length,
        trim_from,
        local_audio_paths,
        stiching_output_uri,
        output_without_audio,
        output_with_audio
    )


In [None]:
#@title Main functions

def select_videos(output_video_files:list[dict])->list[dict]:    
    # This list will be populated by a callback
    final_selected_videos = []

    def handle_user_selection(selected_items: list[dict]):
        """This function will be called when the user clicks submit."""
        print("User selection received in generate_and_select_videos context.")
        final_selected_videos.extend(selected_items) # Populate the list
        print("Final selected videos:", final_selected_videos)

    print("Please select your videos from the UI below:")
    get_user_choice_with_videos(output_video_files, handle_user_selection)
    return final_selected_videos


# Execute Main Functions

## Part 1: Image resize, background recolor, veos generation and user selection

The cell from part1 execute in approximately **1 minute**. However it waits on user input and is not finished until the user makes the selection and clicks the 'Submit' button.

**Important**: If you run the next cell before you make the selection of the provided Veo generated videos and add the promo text, you will see errors. Please select the videos and promo texts before moving to the next one.

## Part 2: Overlaying the Veos videos with image and text overlays and final videos stiching: intro, overlayed veos, outro and audio

The cell from part2 executes in approximately **8 minutes**. However it waits on the user input to select which videos to stitch together: intro, overlayed veos and outro, and is not finished until the user makes the selection and clicks the 'Submit' button.

**Important**: If you run the next cell before you make the selection of the provided video parts, you will see errors. Please select the video parts before moving to the next one.
It will take in total between 15-20 minutes for this cell to finish.

In [None]:
#@title Part 1

output_video_files = video.generate_videos(
    OUTPUT_URI_PATH,
    RESIZED_IMAGE_WIDTH,
    RESIZED_IMAGE_HEIGHT,
    ORIGINAL_BACKGROUND_COLOR,
    BACKGROUND_COLOR,
    APP_SETTINGS
)

selected_videos = select_videos(output_video_files)

In [None]:
#@title Part 2
# Please wait until previous cell is complete before running this one
video.process_videos_with_overlays_and_text(
    selected_videos,
    GCS_IMAGES_TEST,
    TEXT_TEST,
    IMAGE_OVERLAYS_PATH,
    FINAL_OVERLAYS_PATH
)

stitch_videos_with_transitions(
    VEO_CLIPS_URI,
    STITCHING_OUTPUT_URI,
    OUTPUT_WITH_AUDIO,
    OUTPUT_WITHOUT_AUDIO,
    TRANSITION,
    TRANSITON_DURATION,
    TRANSITION_SIDE,
    OUTPUT_LENGHT,
    TRIM_FROM,
    APP_SETTINGS
)

In [None]:
#@title (Optional) Upscale the local video to higher resolution
def upscale_video(
    input_video_path,
    output_video_path,
    target_width,
    target_height):
  """Upscales a video to a higher resolution using ffmpeg.

  Args:
      input_video_path: The path to the input video file.
      output_video_path: The path to the output video file.
      target_width: The desired width of the upscaled video.
      target_height: The desired height of the upscaled video.
  """
  !ffmpeg -i {input_video_path} -vf scale={target_width}:{target_height} -c:a copy {output_video_path}

# Example usage:
output_video_path = f'{TMP_STRING}/upscaled_video.mp4'
target_width = UPSCALE_FACTOR * RESIZED_IMAGE_WIDTH
target_height = UPSCALE_FACTOR * RESIZED_IMAGE_WIDTH

video_audio_filename = gcs.download_file_locally(
    f'{STITCHING_OUTPUT_URI}{OUTPUT_WITH_AUDIO}')
upscale_video(
    video_audio_filename,
    output_video_path,
    target_width,
    target_height
)

In [None]:
#@title (Optional) Clean up local files
def cleanup_tmp_folder(folder_path):
    """
    Removes all files and subdirectories within the specified folder.

    Args:
        folder_path (str): The path to the folder to clean up.
    """
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if os.path.exists(file_path):
          os.remove(file_path)


# Call the function with the TMP_STRING folder path
cleanup_tmp_folder(TMP_STRING)