# Imagen, Flash-2.0 Flash exp - Image Genration and veo2 demo

This notebook showcase how to:
- use Imagen3 to generate our character
- use Gemini 2.0 Flash exp - Image Generation capabilities to adapt our character to different scenes
- use veo2 to generate clips based on those scenes
- use moviepy to merge clips into a single video

Before you execute commands in this notebook, **make sure to check out the pricing**.

Image and video genration **might be quite expensive**.

<a target="_blank" href="https://colab.research.google.com/github/ontaptom/veo2/blob/main/imagen_and_veo2.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg">

In [None]:
# not really necessary since this is already pre-isntalled in colab, but in case of issues, uncomment and install the packages

# !pip install --upgrade --user google-cloud-aiplatform google-genai

In [None]:
# @title Helper function to display images in colab

import typing
import IPython.display
from PIL import Image as PIL_Image
from PIL import ImageOps as PIL_ImageOps

def display_image(
    image,
    max_width: int = 600,
    max_height: int = 350,
) -> None:
    pil_image = typing.cast(PIL_Image.Image, image._pil_image)
    if pil_image.mode != "RGB":
        # RGB is supported by all Jupyter environments (e.g. RGBA is not yet)
        pil_image = pil_image.convert("RGB")
    image_width, image_height = pil_image.size
    if max_width < image_width or max_height < image_height:
        # Resize to display a smaller notebook image
        pil_image = PIL_ImageOps.contain(pil_image, (max_width, max_height))
    IPython.display.display(pil_image)

In [None]:
# @title Auth to google cloud

from google.colab import auth
auth.authenticate_user()

In [None]:
# @title Set up project_id and location

from vertexai.preview.vision_models import ImageGenerationModel
import vertexai

project = "<your-project-id>" # @param {type: "string"}
location = "us-central1" # @param {type: "string"}
vertexai.init(project=project, location=location)



In [None]:
# @title Generate our main character

generation_model = ImageGenerationModel.from_pretrained("imagen-3.0-generate-002")

prompt = "A stylish raccoon in a hoodie and linen shorts, full body shot. high detail. on solid white background" # @param {type: "string"}

images = generation_model.generate_images(
    prompt=prompt,
    number_of_images=1,
    aspect_ratio="16:9",
    negative_prompt="",
    person_generation="allow_adult",
    safety_filter_level="block_few",
)

display_image(images[0])

# Save the image locally in the Colab session
images[0]._pil_image.save('base_image.png')

In [None]:
# @title Save the generation locally

# Save the image locally in the Colab session
images[0]._pil_image.save('base_image.png')

In [None]:
# @title Specify name for the storage bucket

# storage bucket will be used to store generated scenes images as well as video generations.
# You can specify existing bucket or provide a unique name and the bucket in later command
# will be provisioned for you.

video_bucket  = "<unique-bucket-name-or-your-existing-one>" # @param {type: "string"}

In [None]:
# @title check if bucket exist, if not - create it

from google.cloud import storage

# Create a storage client
storage_client = storage.Client(project=project)

# Check if bucket exists, create it if it doesn't
try:
    bucket = storage_client.get_bucket(video_bucket)
    print(f"Bucket {bucket.name} already exists.")
except Exception:
    # Bucket doesn't exist, create it
    bucket = storage_client.create_bucket(video_bucket)
    print(f"Bucket {bucket.name} created.")

In [None]:
# @title imports and set up genai client

from io import BytesIO
from google import genai
from google.genai import types
from google.genai.types import GenerateVideosConfig
import os
import time
from PIL import Image
import base64
from IPython.display import HTML
# Set environment variables
os.environ["GOOGLE_CLOUD_PROJECT"] = project
os.environ["GOOGLE_CLOUD_LOCATION"] = location
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True"
genai_client = genai.Client()

In [None]:
# @title First scene: modify base image with gemini-2.0-flash image gen

scene_1 = "the raccoon is standing in his kitchen and doing the dishes, camera shows the kitchen in the background. aspect ratio 16:9" # @param {type:"string"}

filename = "base_image.png"
input_image = Image.open(filename)

contents_1 = (input_image,scene_1)

# first scene request

response = genai_client.models.generate_content(
    model="gemini-2.0-flash-exp",
    contents=contents_1,
    config=types.GenerateContentConfig(
      response_modalities=['Text','Image']
    )
)

for part in response.candidates[0].content.parts:
  if part.text is not None:
    print(part.text)
  elif part.inline_data is not None:
    image = Image.open(BytesIO((part.inline_data.data)))
    image.save('scene_1.png')
    display(image)

In [None]:
# @title Second scene: modify base image with gemini-2.0-flash image gen

scene_2 = "the raccoon is sitting at a couch with remote in his hand, and watching tv." # @param {type:"string"}

contents_2 = (input_image,scene_2)

# second scene request

response = genai_client.models.generate_content(
    model="gemini-2.0-flash-exp",
    contents=contents_2,
    config=types.GenerateContentConfig(
      response_modalities=['Text','Image']
    )
)

for part in response.candidates[0].content.parts:
  if part.text is not None:
    print(part.text)
  elif part.inline_data is not None:
    image = Image.open(BytesIO((part.inline_data.data)))
    image.save('scene_2.png')
    display(image)

In [None]:
# Upload the file
blob = bucket.blob("scene_1.png")  # Name in the bucket
blob.upload_from_filename("scene_1.png")  # Local file path

print(f"File uploaded to gs://{video_bucket}/scene_1.png")

image_path_1=f"gs://{video_bucket}/scene_1.png"

# Upload the file
blob = bucket.blob("scene_2.png")  # Name in the bucket
blob.upload_from_filename("scene_2.png")  # Local file path

print(f"File uploaded to gs://{video_bucket}/scene_2.png")

image_path_2=f"gs://{video_bucket}/scene_2.png"

In [None]:
# @title Generate first video clip
# Set your bucket path for the output video
output_gcs_uri = f"gs://{video_bucket}/output/"

# Generate video from local image
prompt = "The raccoon is doing the dishes, POV shot zooming out and showing the whole room"  # @param {type:"string"}
operation = genai_client.models.generate_videos(
    model="veo-2.0-generate-001",
    prompt=prompt,
    image=types.Image(
        gcs_uri=image_path_1,
        mime_type="image/png",
    ),
    config=GenerateVideosConfig(
        aspect_ratio="16:9",
        output_gcs_uri=output_gcs_uri,
    ),
)

# Wait for completion
while not operation.done:
    time.sleep(15)
    operation = genai_client.operations.get(operation)
    print(operation)

if operation.response:
    print(operation.result.generated_videos[0].video.uri)
    videoclip_1 = operation.result.generated_videos[0].video.uri

In [None]:
# @title Preview the first clip

# download the first clip to local filesystem
!gsutil cp {videoclip_1} ./clip_1.mp4

# Read the video file and encode it
video_file = open('clip_1.mp4', 'rb')
video_data = video_file.read()
video_file.close()

# Create a data URL
data_url = "data:video/mp4;base64," + base64.b64encode(video_data).decode()

# Display with HTML
HTML(f"""
<video width="640" height="360" controls>
  <source src="{data_url}" type="video/mp4">
  Your browser does not support the video tag.
</video>
""")

In [None]:
# @title Generate second video clip
# Generate video from local image

prompt = "The raccoon is watching tv, looking bored."  # @param {type:"string"}

operation = genai_client.models.generate_videos(
    model="veo-2.0-generate-001",
    prompt=prompt,
    image=types.Image(
        gcs_uri=image_path_2,
        mime_type="image/png",
    ),
    config=GenerateVideosConfig(
        aspect_ratio="16:9",
        output_gcs_uri=output_gcs_uri,
    ),
)

# Wait for completion
while not operation.done:
    time.sleep(15)
    operation = genai_client.operations.get(operation)
    print(operation)

if operation.response:
    print(operation.result.generated_videos[0].video.uri)
    videoclip_2 = operation.result.generated_videos[0].video.uri

In [None]:
# @title Preview the second clip

# download the second clip to local filesystem
!gsutil cp {videoclip_2} ./clip_2.mp4

# Read the video file and encode it
video_file = open('clip_2.mp4', 'rb')
video_data = video_file.read()
video_file.close()

# Create a data URL
data_url = "data:video/mp4;base64," + base64.b64encode(video_data).decode()

# Display with HTML
HTML(f"""
<video width="640" height="360" controls>
  <source src="{data_url}" type="video/mp4">
  Your browser does not support the video tag.
</video>
""")

In [None]:
# @title Install moviepy and do necessary imports

!pip install moviepy

from moviepy.editor import VideoFileClip, concatenate_videoclips, CompositeVideoClip


In [None]:
# @title Create single long clip
# Load the video clips
clip1 = VideoFileClip("clip_1.mp4")
clip2 = VideoFileClip("clip_2.mp4")

# Create a crossfade transition (1 second)
transition_duration = 1.0

# Add fade effects to create a transition
clip1 = clip1.fadeout(transition_duration)
clip2 = clip2.fadein(transition_duration)

# Create the final composite with overlap
final_clip = concatenate_videoclips(
    [clip1, clip2],
    method="compose",
    padding=-transition_duration/2  # This creates the overlap for crossfade
)

# Write the result to a file
output_file = "merged_video.mp4"
final_clip.write_videofile(output_file, codec='libx264')

# Close the clips to free up resources
clip1.close()
clip2.close()
final_clip.close()

# Display using the same code you had

In [None]:
# @title Display the video

# Read the video file and encode it
with open(output_file, 'rb') as video_file:
    video_data = video_file.read()

# Create a data URL
data_url = "data:video/mp4;base64," + base64.b64encode(video_data).decode()

# Display with HTML
HTML(f"""
<video width="640" height="360" controls>
  <source src="{data_url}" type="video/mp4">
  Your browser does not support the video tag.
</video>
""")