<a href="https://colab.research.google.com/github/krMaynard/genai/blob/main/gemini_image_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Standard library imports
import base64
import random
import textwrap
from io import BytesIO

# Third-party imports
from google import genai
from google.colab import files
from google.colab import userdata
from google.genai import types
from IPython.display import display, Markdown
from PIL import Image

secret_api_key = userdata.get('GOOGLE_API_KEY')

client = genai.Client(api_key=secret_api_key)

prompts = []
text_responses = []
image_filenames = []

In [None]:
kirin_prompt = textwrap.dedent("""\
    Generate a hyperrealistic, cinematic photograph of a surreal moment in a modern
    Seoul subway station. A full-grown, elegant giraffe is sitting upright
    on a polished metal bench, impeccably dressed in a tailored dark grey
    business suit with a crisp white shirt and a silk tie. Next to the
    giraffe sits a Korean teenage boy in his late teens, wearing
    contemporary streetwear (a stylish hoodie, jeans, and sneakers). The
    boy is turned towards the giraffe, talking to it with an earnest and
    animated expression, gesturing with one hand. The giraffe appears to
    be listening intently.

    The subway station is clean and brightly lit with a mix of cool
    fluorescent overheads and warm accent lights. In the background,
    digital signs display train information in glowing Hangul (Korean
    script). The tiled walls and floor are pristine, reflecting the
    lights. The yellow safety line is visible on the edge of the platform.

    Capture the scene with a shallow depth of field, keeping the giraffe
    and the boy in sharp focus while the background is softly blurred. The
    lighting should create soft shadows, enhancing the realism and the
    quiet, intimate nature of their bizarre conversation.

    **Style:** Photorealistic, 8K resolution, ultra-detailed, sharp focus,
    shot on a Sony A7R IV with a 50mm f/1.2 lens.\
""")

cards_prompt = textwrap.dedent("""\
    A minimalist 2D render of a house of cards, built from sleek, single-color
    cards. The structure is leaning and unstable, with a few cards sliding
    apart, ready to collapse. Set against a clean, solid white background. Corporate infographic style, vibrant accent color.
""")

def generate_image(prompt):
  prompts.append(prompt)

  response = client.models.generate_content(
      model="gemini-2.5-flash-image-preview",
      contents=prompt,
      config=types.GenerateContentConfig(
        response_modalities=['TEXT', 'IMAGE']
      )
  )

  for part in response.candidates[0].content.parts:
    if part.text is not None:
      text_response = part.text
      text_responses.append(text_response)
    elif part.inline_data is not None:
      image = Image.open(BytesIO((part.inline_data.data)))
      random_number = random.randint(0, 1000)
      image_filename = f'gemini-native-image_{random_number}.png'
      image_filenames.append(image_filename)
      image.save(image_filename)
      image.show()
  return response, image_filename


In [None]:
for i in range(5):
  imagen = generate_image(cards_prompt)
  image_filename = imagen[1]
  print(f'Image {i+1} saved to {image_filename}')

Image 1 saved to gemini-native-image_440.png


In [None]:
# Download all local files

for filename in image_filenames:
  files.download(filename)