In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Editing with Imagen 2 on Vertex AI


<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/image_generation.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fvision%2Fgetting-started%2Fimage_generation.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/vision/getting-started/image_generation.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> Open in Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/vision/getting-started/image_generation.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

## Overview

[Imagen 2 on Vertex AI](https://cloud.google.com/vertex-ai/docs/generative-ai/image/overview) brings Google's state of the art generative AI capabilities to application developers. With Imagen 2 on Vertex AI, application developers can build next-generation AI products that edit images.

With Imagen 2, you can not only generate an image, but edit an image using a mask you provide or with mask-free editing.

This notebook focuses on **image editing** only. You can read more about image generation feature from Imagen [here](https://cloud.google.com/vertex-ai/generative-ai/docs/image/edit-images).


### Objectives

In this notebook, you will be exploring the image editing features of Imagen using the Vertex AI Python SDK. You will

- Edit an entire uploaded or generated image with a text prompt.
- Define specific objects in an image to edit.
- Edit the background of an image.
- Edit the foreground of an image.
- Remove the background or foreground of an image
- Experiment with different parameters, such as:
    - reducing the dilation of a mask for thin objects
    - influencing the edited output image using negative prompts


### Costs

- This notebook uses billable components of Google Cloud:
  - Vertex AI (Imagen)

- Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing Calculator](https://cloud.google.com/products/calculator/) to generate a cost estimate based on your projected usage.

## Getting Started

### Install Vertex AI SDK for Python

In [None]:
! pip install --quiet --upgrade --user google-cloud-aiplatform

### Restart current runtime

To use the newly installed packages in this Jupyter runtime, it is recommended to restart the runtime. Run the following cell to restart the current kernel.

The restart process might take a minute or so.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

After the restart is complete, continue to the next step.


<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Please wait until it is finished before continuing to the next step. ⚠️</b>
</div>

### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, run the following cell to authenticate your environment. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).

In [None]:
import sys

# Addtional authentication is required for Google Colab
if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and enable the Vertex AI API.

Learn more about setting up a project and a development environment.

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}


import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Import libraries

Import libraries and define utility functions. You will also load the imagegeneration@006 model from the Vertex SDK.

Learn more about [editing with the Imagen models](https://cloud.google.com/vertex-ai/generative-ai/docs/image/edit-images).


In [None]:
import math
from typing import List

import matplotlib.pyplot as plt
from vertexai.preview.vision_models import (
    GeneratedImage,
    ImageGenerationModel,
    ImageGenerationResponse,
)


# Displays images in a grid below the cell
def display_images_in_grid(images: List[GeneratedImage]) -> None:
    """Displays the provided images in a grid format. 4 images per row.

    Args:
        images: A list of GeneratedImage objects representing the images to display.
    """

    # Determine the number of rows and columns for the grid layout.
    nrows: int = math.ceil(len(images) / 4)  # Display at most 4 images per row
    ncols: int = min(len(images) + 1, 4)  # Adjust columns based on the number of images

    # Create a figure and axes for the grid layout.
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(12, 6))

    for i, ax in enumerate(axes.flat):
        if i < len(images):
            # Display the image in the current axis.
            ax.imshow(images[i]._pil_image)

            # Adjust the axis aspect ratio to maintain image proportions.
            ax.set_aspect("equal")

            # Disable axis ticks for a cleaner appearance.
            ax.set_xticks([])
            ax.set_yticks([])
        else:
            # Hide empty subplots to avoid displaying blank axes.
            ax.axis("off")

    # Adjust the layout to minimize whitespace between subplots.
    plt.tight_layout()

    # Display the figure with the arranged images.
    plt.show()


IMAGE_GENERATION_MODEL = "imagegeneration@006"
generation_model = ImageGenerationModel.from_pretrained(IMAGE_GENERATION_MODEL)

## Set the image to edit

### Generate with Imagen
Use the `generate_images` function with Imagen. All you need is a text prompt.

In [None]:
PROMPT = "a deer in a field looking at the camera"

response: ImageGenerationResponse = generation_model.generate_images(
    prompt=PROMPT,
)

INPUT_IMAGE = response.images[0]
display_images_in_grid(response.images)

## Edit images using MaskMode

Now you can edit images without providing your own mask. Configure the `mask_mode` field `edit_image` request to automatically generate a mask on the input image.

MaskMode provides the following modes:
* **Background**: Edit the background of an image
* **Foreground**: Edit the foreground of an image
* **Semantic**: Edit specified objects in an image. You can edit 1 to 5 objects in an image using semantic segmentation classes.

The `semantic` maskMode option requires you to set **Segmentation classes**. You must set 1 to 5 classes using the desired class ID. The full table of available classes is listed in the `Appendix` section at the end of this Colab.

### Explore different MaskMode options

This section will explores how to edit images using different `edit_mode` and `mask_mode` parameter options.

In [None]:
# Set the edit mode. Choose from ["inpainting-insert", "inpainting-remove", "outpainting"].
EDIT_MODE = "inpainting-insert"
# Set how the mask should be generated. Choose from ["background", "foreground", "semantic"].
MASK_MODE = "foreground"
# Specify an object to edit using a segmentation class. Only valid for `semantic` maskMode.
SEGMENTATION_CLASS = 16

# Set a text prompt to influence how the masked part of the image will be edited.
PROMPT = "a cow looking at the camera"
# [Optional] Set a negative prompt to define what you don't want to see.
NEGATIVE_PROMPT = ""

classes = None
if MASK_MODE == "semantic":
    classes = [SEGMENTATION_CLASS]

response: ImageGenerationResponse = generation_model.edit_image(
    prompt=PROMPT,
    base_image=INPUT_IMAGE,
    negative_prompt=NEGATIVE_PROMPT,
    number_of_images=4,
    edit_mode=EDIT_MODE,
    mask_mode=MASK_MODE,
    segmentation_classes=classes,
)

display_images_in_grid(response.images)

### Inpainting-insert with Background maskMode

Edit the background of an image using a text prompt.

In [None]:
EDIT_MODE = "inpainting-insert"
MASK_MODE = "background"
# The background will be edited to adhere to the text prompt below.
PROMPT = "sandy desert oasis"
# [Optional] Set a negative prompt to define what you don't want to see.
NEGATIVE_PROMPT = ""

response: ImageGenerationResponse = generation_model.edit_image(
    prompt=PROMPT,
    base_image=INPUT_IMAGE,
    negative_prompt=NEGATIVE_PROMPT,
    edit_mode=EDIT_MODE,
    mask_mode=MASK_MODE,
    segmentation_classes=classes,
)

display_images_in_grid(response.images)

### Inpainting-insert with Foreground maskMode

In [None]:
EDIT_MODE = "inpainting-insert"
MASK_MODE = "foreground"

# The foreground of the object will be edited according to the text prompt below.
PROMPT = "a bear looking at the camera"
# [Optional] Set a negative prompt to define what you don't want to see.
NEGATIVE_PROMPT = ""

response: ImageGenerationResponse = generation_model.edit_image(
    prompt=PROMPT,
    base_image=INPUT_IMAGE,
    negative_prompt=NEGATIVE_PROMPT,
    edit_mode=EDIT_MODE,
    mask_mode=MASK_MODE,
    segmentation_classes=classes,
)

display_images_in_grid(response.images)

### Inpainting-insert with Semantic maskMode

Edit a specified object or multiple objects in an image using Semantic maskMode.
You must set between 1 and 5 IDs in the `segmentation_classes` field. The full
list of available segmentation classes is listed in the Appendix section at the bottom of this Colab.

In [None]:
EDIT_MODE = "inpainting-insert"
MASK_MODE = "semantic"

# Set the specified object(s) to edit in an image using a segmentation class.
SEGMENTATION_CLASS = 16

PROMPT = "A cow looking at the camera"
# [Optional] Set a negative prompt to define what you don't want to see.
NEGATIVE_PROMPT = ""

response: ImageGenerationResponse = generation_model.edit_image(
    prompt=PROMPT,
    base_image=INPUT_IMAGE,
    negative_prompt=NEGATIVE_PROMPT,
    edit_mode=EDIT_MODE,
    mask_mode=MASK_MODE,
    segmentation_classes=[SEGMENTATION_CLASS],
)

display_images_in_grid(response.images)

### Inpainting-remove with foreground maskMode

Remove the foreground object of an image.

In [None]:
EDIT_MODE = "inpainting-remove"
MASK_MODE = "foreground"
PROMPT = "Background, landscape photo"
# [Optional] Set a negative prompt to define what you don't want to see.
NEGATIVE_PROMPT = ""

response: ImageGenerationResponse = generation_model.edit_image(
    prompt=PROMPT,
    base_image=INPUT_IMAGE,
    edit_mode=EDIT_MODE,
    mask_mode=MASK_MODE,
    segmentation_classes=classes,
)

display_images_in_grid(response.images)

### Inpainting-remove with Semantic maskMode

Remove the specified object(s) in an image using a segmentation class.

In [None]:
EDIT_MODE = "inpainting-remove"
MASK_MODE = "semantic"
# Set the object that will be removed according to its segmentation class ID.
SEGMENTATION_CLASS = 125

# Set a text prompt to guide the edited image.
PROMPT = "Background, landscape photo"
# Set a negative prompt to define what you don't want to see.
NEGATIVE_PROMPT = ""

response: ImageGenerationResponse = generation_model.edit_image(
    prompt=PROMPT,
    base_image=INPUT_IMAGE,
    negative_prompt=NEGATIVE_PROMPT,
    edit_mode=EDIT_MODE,
    mask_mode=MASK_MODE,
    segmentation_classes=[SEGMENTATION_CLASS],
)

display_images_in_grid(response.images)

## Conclusion

You have explored the Imagen's image editing features through the Vertex AI Python SDK, including the additional parameters that influence image generation.

Check out the Vertex AI reference to learn more about how to [Edit image prompts](https://cloud.google.com/vertex-ai/generative-ai/docs/image/img-gen-prompt-guide#edit-prompts).

## Appendix

### Semantic segmentation classes
| Class ID | Instance Type | Class ID | Instance Type | Class ID | Instance Type | Class ID | Instance Type |
|---|---|---|---|---|---|---|---|
|	0	|	backpack	|	50	|	carrot	|	100	|	sidewalk_pavement	|	150	|	skis	|
|	1	|	umbrella	|	51	|	hot_dog	|	101	|	runway	|	151	|	snowboard	|
|	2	|	bag	|	52	|	pizza	|	102	|	terrain	|	152	|	sports_ball	|
|	3	|	tie	|	53	|	donut	|	103	|	book	|	153	|	kite	|
|	4	|	suitcase	|	54	|	cake	|	104	|	box	|	154	|	baseball_bat	|
|	5	|	case	|	55	|	fruit_other	|	105	|	clock	|	155	|	baseball_glove	|
|	6	|	bird	|	56	|	food_other	|	106	|	vase	|	156	|	skateboard	|
|	7	|	cat	|	57	|	chair_other	|	107	|	scissors	|	157	|	surfboard	|
|	8	|	dog	|	58	|	armchair	|	108	|	plaything_other	|	158	|	tennis_racket	|
|	9	|	horse	|	59	|	swivel_chair	|	109	|	teddy_bear	|	159	|	net	|
|	10	|	sheep	|	60	|	stool	|	110	|	hair_dryer	|	160	|	base	|
|	11	|	cow	|	61	|	seat	|	111	|	toothbrush	|	161	|	sculpture	|
|	12	|	elephant	|	62	|	couch	|	112	|	painting	|	162	|	column	|
|	13	|	bear	|	63	|	trash_can	|	113	|	poster	|	163	|	fountain	|
|	14	|	zebra	|	64	|	potted_plant	|	114	|	bulletin_board	|	164	|	awning	|
|	15	|	giraffe	|	65	|	nightstand	|	115	|	bottle	|	165	|	apparel	|
|	16	|	animal_other	|	66	|	bed	|	116	|	cup	|	166	|	banner	|
|	17	|	microwave	|	67	|	table	|	117	|	wine_glass	|	167	|	flag	|
|	18	|	radiator	|	68	|	pool_table	|	118	|	knife	|	168	|	blanket	|
|	19	|	oven	|	69	|	barrel	|	119	|	fork	|	169	|	curtain_other	|
|	20	|	toaster	|	70	|	desk	|	120	|	spoon	|	170	|	shower_curtain	|
|	21	|	storage_tank	|	71	|	ottoman	|	121	|	bowl	|	171	|	pillow	|
|	22	|	conveyor_belt	|	72	|	wardrobe	|	122	|	tray	|	172	|	towel	|
|	23	|	sink	|	73	|	crib	|	123	|	range_hood	|	173	|	rug_floormat	|
|	24	|	refrigerator	|	74	|	basket	|	124	|	plate	|	174	|	vegetation	|
|	25	|	washer_dryer	|	75	|	chest_of_drawers	|	125	|	person	|	175	|	bicycle	|
|	26	|	fan	|	76	|	bookshelf	|	126	|	rider_other	|	176	|	car	|
|	27	|	dishwasher	|	77	|	counter_other	|	127	|	bicyclist	|	177	|	autorickshaw	|
|	28	|	toilet	|	78	|	bathroom_counter	|	128	|	motorcyclist	|	178	|	motorcycle	|
|	29	|	bathtub	|	79	|	kitchen_island	|	129	|	paper	|	179	|	airplane	|
|	30	|	shower	|	80	|	door	|	130	|	streetlight	|	180	|	bus	|
|	31	|	tunnel	|	81	|	light_other	|	131	|	road_barrier	|	181	|	train	|
|	32	|	bridge	|	82	|	lamp	|	132	|	mailbox	|	182	|	truck	|
|	33	|	pier_wharf	|	83	|	sconce	|	133	|	cctv_camera	|	183	|	trailer	|
|	34	|	tent	|	84	|	chandelier	|	134	|	junction_box	|	184	|	boat_ship	|
|	35	|	building	|	85	|	mirror	|	135	|	traffic_sign	|	185	|	slow_wheeled_object	|
|	36	|	ceiling	|	86	|	whiteboard	|	136	|	traffic_light	|	186	|	river_lake	|
|	37	|	laptop	|	87	|	shelf	|	137	|	fire_hydrant	|	187	|	sea	|
|	38	|	keyboard	|	88	|	stairs	|	138	|	parking_meter	|	188	|	water_other	|
|	39	|	mouse	|	89	|	escalator	|	139	|	bench	|	189	|	swimming_pool	|
|	40	|	remote	|	90	|	cabinet	|	140	|	bike_rack	|	190	|	waterfall	|
|	41	|	cell phone	|	91	|	fireplace	|	141	|	billboard	|	191	|	wall	|
|	42	|	television	|	92	|	stove	|	142	|	sky	|	192	|	window	|
|	43	|	floor	|	93	|	arcade_machine	|	143	|	pole	|	193	|	window_blind	|
|	44	|	stage	|	94	|	gravel	|	144	|	fence	|
|	45	|	banana	|	95	|	platform	|	145	|	railing_banister	|
|	46	|	apple	|	96	|	playingfield	|	146	|	guard_rail	|
|	47	|	sandwich	|	97	|	railroad	|	147	|	mountain_hill	|
|	48	|	orange	|	98	|	road	|	148	|	rock	|
|	49	|	broccoli	|	99	|	snow	|	149	|	frisbee	|
