## Using JSON mode to avoid having to specify a grammar

OpenAI supports a JSON mode.

In [1]:
#%pip install --upgrade --quiet openai

Note: you may need to restart the kernel to use updated packages.


In [1]:
MODEL_ID = "gpt-4o-mini"

import os
from dotenv import load_dotenv
load_dotenv("../keys.env")
assert os.environ["OPENAI_API_KEY"][:2] == "sk",\
       "Please specify the OPENAI_API_KEY access token in keys.env file"

In [2]:
from openai import OpenAI
client = OpenAI()

## Zero-shot generation

Specifying JSON as the response format

In [6]:
def parse_book_info(paragraph: str) -> str:
    system_prompt = """
    You will be given a short paragraph about a book.
    Extract the author, title, and publication year of the book.
    Return the result as JSON with the keys author, title, and year.
    If any piece of information is not found, fill the spot with NULL
    """
    
    input_message = [
        {"role": "developer", "content": system_prompt},
        {"role": "user", "content": paragraph}   
    ]

    response = client.chat.completions.create(
        model=MODEL_ID,
        messages=input_message,
        response_format={"type": "json_object"}
    )
    return response.choices[0].message.content


result = parse_book_info("""
Love in the Time of Cholera (Spanish: El amor en los tiempos del cólera) is a novel written in Spanish
by Colombian Nobel Prize-winning author Gabriel García Márquez and published in 1985.
""")
print(result)

{
    "author": "Gabriel García Márquez",
    "title": "Love in the Time of Cholera",
    "year": 1985
}


In [7]:
result = parse_book_info("""
The Tirukkural (Tamil: திருக்குறள், lit. 'sacred verses')
is a classic Tamil language text whose authorship is traditionally attributed to Valluvar,
also known in full as Thiruvalluvar. The text has been dated variously from 300 BCE to 5th century CE. 
The traditional accounts describe it as the last work of the third Sangam, but linguistic analysis
suggests a later date of 450 to 500 CE and that it was composed after the Sangam period.
""")
print(result)

{
    "author": "Valluvar",
    "title": "The Tirukkural",
    "year": "NULL"
}
