# Google Gemini with Pydantic for Structured Output

Gemini models can generate structured output in JSON format, which is useful for applications that require a specific data schema. You can define this schema using Pydantic and pass it to the Gemini API to ensure the model returns data in the expected structure. This is useful for a variety of applications, such as extracting information from text or images.

You can learn more about structured output with Gemini here:
[https://ai.google.dev/gemini-api/docs/structured-output](https://ai.google.dev/gemini-api/docs/structured-output)

In [None]:
%pip install google-genai pydantic

In [11]:
import os
from google import genai
from pydantic import BaseModel

# create client
client = genai.Client(api_key=os.getenv("GEMINI_API_KEY","xxx"))


# Define Pydantic schemas 
class Ingredient(BaseModel):
  name: str
  quantity: str
  unit: str

class Recipe(BaseModel):
  recipe_name: str
  ingredients: list[Ingredient]


# Generate a list of cookie recipes
response = client.models.generate_content(
    model='gemini-2.0-flash-lite',
    contents='List a few popular cookie recipes.',
    config={
        'response_mime_type': 'application/json',
        'response_schema': list[Recipe],
    },
)
# Use the parsed response
recipes: list[Recipe] = response.parsed
recipes

[Recipe(recipe_name='Chocolate Chip Cookies', ingredients=[Ingredient(name='butter', quantity='1', unit='cup'), Ingredient(name='sugar', quantity='1', unit='cup'), Ingredient(name='brown sugar', quantity='1', unit='cup'), Ingredient(name='eggs', quantity='2', unit=''), Ingredient(name='vanilla extract', quantity='1', unit='tsp'), Ingredient(name='baking soda', quantity='1', unit='tsp'), Ingredient(name='salt', quantity='1', unit='tsp'), Ingredient(name='flour', quantity='3', unit='cups'), Ingredient(name='chocolate chips', quantity='2', unit='cups')]),
 Recipe(recipe_name='Sugar Cookies', ingredients=[Ingredient(name='butter', quantity='1', unit='cup'), Ingredient(name='sugar', quantity='1', unit='cup'), Ingredient(name='eggs', quantity='1', unit=''), Ingredient(name='vanilla extract', quantity='1', unit='tsp'), Ingredient(name='flour', quantity='2 1/4', unit='cups'), Ingredient(name='baking powder', quantity='1', unit='tsp'), Ingredient(name='salt', quantity='1/2', unit='tsp')]),
 Rec

## Using JSON Schema for Structured Output

Besides Pydantic, you can also define the response schema using JSON Schema directly. This provides a flexible way to specify the expected structure of the JSON output. The following example demonstrates various features of JSON Schema, including nested objects, arrays, enums, default values, and union types.

In [None]:
my_schema = {
    "type": "object",
    "title": "ProductCatalog",
    "description": "Schema for a product catalog item.",
    "properties": {
        "product_id": {
            "type": "string",
            "description": "Unique identifier for the product.",
            "pattern": "^[A-Z0-9]{5,10}$"
        },
        "product_name": {
            "type": "string",
            "description": "Name of the product."
        },
        "price": {
            "type": "number",
            "description": "Price of the product.",
            "minimum": 0.01,
            "maximum": 10000.00
        },
        "in_stock": {
            "type": "boolean",
            "description": "Availability status of the product.",
            "default": True
        },
        "tags": {
            "type": "array",
            "description": "Keywords for the product.",
            "items": {
                "type": "string"
            },
            "minItems": 1,
            "maxItems": 5
        },
        "category": {
            "type": "string",
            "description": "Product category.",
            "enum": ["Electronics", "Apparel", "Home Goods", "Books"]
        },
        "dimensions_or_weight": {
            "anyOf": [
                { "$ref": "#/$defs/dimensions" },
                { "$ref": "#/$defs/weight" }
            ],
            "description": "Either physical dimensions or weight of the product."
        },
        "supplier": {
            "$ref": "#/$defs/supplierInfo"
        }
    },
    "required": ["product_id", "product_name", "price", "category"],
    "$defs": {
        "dimensions": {
            "type": "object",
            "title": "Dimensions",
            "properties": {
                "length": {"type": "number", "description": "Length in cm."},
                "width": {"type": "number", "description": "Width in cm."},
                "height": {"type": "number", "description": "Height in cm."}
            },
            "required": ["length", "width", "height"]
        },
        "weight": {
            "type": "object",
            "title": "Weight",
            "properties": {
                "value": {"type": "number", "description": "Weight value."},
                "unit": {"type": "string", "enum": ["kg", "lb"], "default": "kg"}
            },
            "required": ["value", "unit"]
        },
        "supplierInfo": {
            "type": "object",
            "title": "SupplierInformation",
            "description": "Information about the product supplier.",
            "properties": {
                "supplier_name": {"type": "string"},
                "contact_email": {"type": "string", "format": "email"}
            },
            "required": ["supplier_name"]
        }
    }
}

# You can optionally print the schema to verify its structure
# import json
# print(json.dumps(my_schema, indent=2))

In [None]:
import os
from google import genai

# Ensure the client is initialized.
# If running this cell independently, you might need to uncomment the next line
# or ensure GEMINI_API_KEY is set in your environment.
# client = genai.Client(api_key=os.getenv("GEMINI_API_KEY")) 

# If the client is already defined in a previous cell (e.g. from Pydantic example), 
# this code will assume it's available.

prompt_json = "Generate a sample product listing for a new high-tech drone, be creative with the description and features."

try:
    response_json_schema = client.models.generate_content(
        model='gemini-1.5-flash', # Using a model that supports JSON schema well
        contents=prompt_json,
        config={
            'response_mime_type': 'application/json',
            'response_schema': my_schema  # Use the schema defined in the previous cell
        },
    )
except Exception as e:
    print(f"An error occurred: {e}")
    response_json_schema = None # Ensure the variable exists even if the call fails

In [None]:
import json

if response_json_schema:
    # The .parsed attribute will contain the Python dict parsed from the JSON response
    parsed_product_data = response_json_schema.parsed
    print(json.dumps(parsed_product_data, indent=2))
else:
    print("No response to display as the API call might have failed.")

# You can also directly access parts of the parsed data if needed:
# if response_json_schema and response_json_schema.parsed:
#     print(f"Product Name: {response_json_schema.parsed.get('product_name')}")