In [None]:
# Copyright 2026 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

## Getting Started

### Install Google Gen AI SDK for Python

In [None]:
!pip install --upgrade --quiet google-genai

### Restart runtime
To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.

The restart might take a minute or longer. After it's restarted, continue to the next step.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### Authenticate your notebook environment (Colab only)
If you are running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Import libraries

In [None]:
from google import genai
from google.genai import types
import pathlib
import os
from IPython.display import display
from PIL import Image
import io

## Use AIStudio or VertexAI

**Note:** Initialize with either VertexAI or AIStudio

### For VertexAI - Set Google Cloud project information and create client

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
PROJECT_ID = ""  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION =  "global"
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)


### For AIStudio

To get started with AI Studio, get an [API Key from AI Studio](https://aistudio.google.com/api-keys)

**Note:** ALWAYS STORE YOUR KEY IN Secrets Manager or env files

In [None]:
GEMINI_API_KEY = ""  # @param {type: "string", placeholder: "[your-api-key]", isTemplate: true}
client = genai.Client(
        api_key=GEMINI_API_KEY
    )

## Agentic Vision in action

In this notebook we are using a local image for analysis. So let's first check the image

In [None]:
# Load local image
image_path = "/content/stuff_on_a_desk.jpg"
image_bytes = pathlib.Path(image_path).read_bytes()
image = types.Part.from_bytes(
    data=image_bytes, mime_type="image/jpeg"
)


In [None]:
display(Image.open(io.BytesIO(image_bytes)))

To use Code Execution with images, enable both Code Execution as a tool and Thinking.

In [None]:
prompt = """Count the items in this image and classify them and label it by its
specific color and object type.
Point to all the items in the image in the format of {"point": [y,x], label=""}.
Identify any objects that should not belong to a work desk and annotate them on
the image with boxes, arrows and make sure you label them.
Also provide the reasoning on why they do not belong in a work desk"""

In [None]:
response = client.models.generate_content(
    model="gemini-3-flash-preview",
    contents=[image, prompt],
    config=types.GenerateContentConfig(
        tools=[types.Tool(code_execution=types.ToolCodeExecution)],
        thinking_config=types.ThinkingConfig(
            thinking_level="HIGH",
        ),
    ),
)

Let's look at the different parts of the response

#### Parse the response

The model's reasoning text

In [None]:
for part in response.candidates[0].content.parts:
    if part.text is not None:
        print(part.text)

The code the model generated to solve the problem

In [None]:
for part in response.candidates[0].content.parts:
    if part.executable_code is not None:
        print(part.executable_code.code)

The output of the code execution

Any resulting images generated by the code

In [None]:
for part in response.candidates[0].content.parts:
    if part.as_image() is not None:
        display(Image.open(io.BytesIO(part.as_image().image_bytes)))