# Lecture - Pydantic to structure output from LLMs

In [2]:
from dotenv import load_dotenv
from google import genai
import os

load_dotenv()

client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))

response = client.models.generate_content(
    model="gemini-2.5-flash", contents="Explain how AI works in a few words"
)
print(response.text)

AI learns patterns from data to make decisions or predictions.


In [4]:
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="""You are a helpful assistant. I need you to create a JSON object representing a library.
    The library's name should be 'Coolu Libraru' and have the fields name and books that contains a list of book.
    Each book should have a 'title', 'author', and 'year' field. Make sure the output is a single, valid JSON object. Give me 10 books.
    Remove ```json and ``` """,
)

response.text

'{\n  "name": "Coolu Libraru",\n  "books": [\n    {\n      "title": "The Silent Planet",\n      "author": "C.S. Lewis",\n      "year": 1938\n    },\n    {\n      "title": "Dune",\n      "author": "Frank Herbert",\n      "year": 1965\n    },\n    {\n      "title": "Foundation",\n      "author": "Isaac Asimov",\n      "year": 1951\n    },\n    {\n      "title": "1984",\n      "author": "George Orwell",\n      "year": 1949\n    },\n    {\n      "title": "Brave New World",\n      "author": "Aldous Huxley",\n      "year": 1932\n    },\n    {\n      "title": "Neuromancer",\n      "author": "William Gibson",\n      "year": 1984\n    },\n    {\n      "title": "Do Androids Dream of Electric Sheep?",\n      "author": "Philip K. Dick",\n      "year": 1968\n    },\n    {\n      "title": "The Hitchhiker\'s Guide to the Galaxy",\n      "author": "Douglas Adams",\n      "year": 1979\n    },\n    {\n      "title": "Ender\'s Game",\n      "author": "Orson Scott Card",\n      "year": 1985\n    },\n    {

In [5]:
print(response.text)

{
  "name": "Coolu Libraru",
  "books": [
    {
      "title": "The Silent Planet",
      "author": "C.S. Lewis",
      "year": 1938
    },
    {
      "title": "Dune",
      "author": "Frank Herbert",
      "year": 1965
    },
    {
      "title": "Foundation",
      "author": "Isaac Asimov",
      "year": 1951
    },
    {
      "title": "1984",
      "author": "George Orwell",
      "year": 1949
    },
    {
      "title": "Brave New World",
      "author": "Aldous Huxley",
      "year": 1932
    },
    {
      "title": "Neuromancer",
      "author": "William Gibson",
      "year": 1984
    },
    {
      "title": "Do Androids Dream of Electric Sheep?",
      "author": "Philip K. Dick",
      "year": 1968
    },
    {
      "title": "The Hitchhiker's Guide to the Galaxy",
      "author": "Douglas Adams",
      "year": 1979
    },
    {
      "title": "Ender's Game",
      "author": "Orson Scott Card",
      "year": 1985
    },
    {
      "title": "Slaughterhouse-Five",
      "autho

In [6]:
from pydantic import BaseModel, Field
from typing import List
from datetime import datetime

class Book(BaseModel):
    title: str
    author: str
    year: int = Field(gt = 1000, lt = datetime.now().year)

class Library(BaseModel):
    name: str
    books: List[Book]


library = Library.model_validate_json(response.text)
library

Library(name='Coolu Libraru', books=[Book(title='The Silent Planet', author='C.S. Lewis', year=1938), Book(title='Dune', author='Frank Herbert', year=1965), Book(title='Foundation', author='Isaac Asimov', year=1951), Book(title='1984', author='George Orwell', year=1949), Book(title='Brave New World', author='Aldous Huxley', year=1932), Book(title='Neuromancer', author='William Gibson', year=1984), Book(title='Do Androids Dream of Electric Sheep?', author='Philip K. Dick', year=1968), Book(title="The Hitchhiker's Guide to the Galaxy", author='Douglas Adams', year=1979), Book(title="Ender's Game", author='Orson Scott Card', year=1985), Book(title='Slaughterhouse-Five', author='Kurt Vonnegut', year=1969)])

In [7]:
library.__dict__

{'name': 'Coolu Libraru',
 'books': [Book(title='The Silent Planet', author='C.S. Lewis', year=1938),
  Book(title='Dune', author='Frank Herbert', year=1965),
  Book(title='Foundation', author='Isaac Asimov', year=1951),
  Book(title='1984', author='George Orwell', year=1949),
  Book(title='Brave New World', author='Aldous Huxley', year=1932),
  Book(title='Neuromancer', author='William Gibson', year=1984),
  Book(title='Do Androids Dream of Electric Sheep?', author='Philip K. Dick', year=1968),
  Book(title="The Hitchhiker's Guide to the Galaxy", author='Douglas Adams', year=1979),
  Book(title="Ender's Game", author='Orson Scott Card', year=1985),
  Book(title='Slaughterhouse-Five', author='Kurt Vonnegut', year=1969)]}

In [8]:
type(library)

__main__.Library

In [9]:
isinstance(library, BaseModel)

True

In [10]:
library.name

'Coolu Libraru'

In [11]:
library.books

[Book(title='The Silent Planet', author='C.S. Lewis', year=1938),
 Book(title='Dune', author='Frank Herbert', year=1965),
 Book(title='Foundation', author='Isaac Asimov', year=1951),
 Book(title='1984', author='George Orwell', year=1949),
 Book(title='Brave New World', author='Aldous Huxley', year=1932),
 Book(title='Neuromancer', author='William Gibson', year=1984),
 Book(title='Do Androids Dream of Electric Sheep?', author='Philip K. Dick', year=1968),
 Book(title="The Hitchhiker's Guide to the Galaxy", author='Douglas Adams', year=1979),
 Book(title="Ender's Game", author='Orson Scott Card', year=1985),
 Book(title='Slaughterhouse-Five', author='Kurt Vonnegut', year=1969)]

`extract titles into a list`

In [12]:
titles = [book.title for book in library.books]
titles

['The Silent Planet',
 'Dune',
 'Foundation',
 '1984',
 'Brave New World',
 'Neuromancer',
 'Do Androids Dream of Electric Sheep?',
 "The Hitchhiker's Guide to the Galaxy",
 "Ender's Game",
 'Slaughterhouse-Five']

`extract titles with books after certain year`

In [13]:
newer_books = [(book.title, book.year) for book in library.books if book.year > 1950]
newer_books

[('Dune', 1965),
 ('Foundation', 1951),
 ('Neuromancer', 1984),
 ('Do Androids Dream of Electric Sheep?', 1968),
 ("The Hitchhiker's Guide to the Galaxy", 1979),
 ("Ender's Game", 1985),
 ('Slaughterhouse-Five', 1969)]

In [14]:
library.model_dump()

{'name': 'Coolu Libraru',
 'books': [{'title': 'The Silent Planet',
   'author': 'C.S. Lewis',
   'year': 1938},
  {'title': 'Dune', 'author': 'Frank Herbert', 'year': 1965},
  {'title': 'Foundation', 'author': 'Isaac Asimov', 'year': 1951},
  {'title': '1984', 'author': 'George Orwell', 'year': 1949},
  {'title': 'Brave New World', 'author': 'Aldous Huxley', 'year': 1932},
  {'title': 'Neuromancer', 'author': 'William Gibson', 'year': 1984},
  {'title': 'Do Androids Dream of Electric Sheep?',
   'author': 'Philip K. Dick',
   'year': 1968},
  {'title': "The Hitchhiker's Guide to the Galaxy",
   'author': 'Douglas Adams',
   'year': 1979},
  {'title': "Ender's Game", 'author': 'Orson Scott Card', 'year': 1985},
  {'title': 'Slaughterhouse-Five', 'author': 'Kurt Vonnegut', 'year': 1969}]}

`output json file`

In [15]:
with open("library.json", "w") as json_file:
    json_file.write(library.model_dump_json())

`create pandas dataframe`

In [16]:
import pandas as pd

titles = [book.title for book in library.books]
years = [book.year for book in library.books]
authors = [book.author for book in library.books]

pd.DataFrame({"title": titles, "year": years, "author": authors})

Unnamed: 0,title,year,author
0,The Silent Planet,1938,C.S. Lewis
1,Dune,1965,Frank Herbert
2,Foundation,1951,Isaac Asimov
3,1984,1949,George Orwell
4,Brave New World,1932,Aldous Huxley
5,Neuromancer,1984,William Gibson
6,Do Androids Dream of Electric Sheep?,1968,Philip K. Dick
7,The Hitchhiker's Guide to the Galaxy,1979,Douglas Adams
8,Ender's Game,1985,Orson Scott Card
9,Slaughterhouse-Five,1969,Kurt Vonnegut
