In [1]:
#!pip install llama-index

In [2]:
from pydantic import BaseModel
from typing import List

from llama_index.core.program import LLMTextCompletionProgram

#### Define output schema

In [4]:
class Song(BaseModel):
    """Data model for a song."""

    title: str
    length_seconds: int


class Album(BaseModel):
    """Data model for an album."""

    name: str
    artist: str
    songs: List[Song]

#### Define LLM pydantic program

In [5]:
from llama_index.core.program import LLMTextCompletionProgram

In [6]:
prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\
"""

In [7]:
program = LLMTextCompletionProgram.from_defaults(
    output_cls         = Album,
    prompt_template_str= prompt_template_str,
    verbose            = True,
)

#### Run program to get structured output.

In [16]:
output = program(movie_name="The Shining")
output

Album(name='Redrum', artist='The Overlook Hotel', songs=[Song(title="Here's Johnny", length_seconds=240), Song(title='All Work and No Play', length_seconds=180), Song(title='Room 237', length_seconds=300), Song(title='The Shining', length_seconds=360), Song(title='Forever and Ever and Ever', length_seconds=420)])

Initialize with Pydantic Output Parser

The above is equivalent to defining a Pydantic output parser and passing that in instead of the output_cls directly.

In [17]:
from llama_index.core.output_parsers import PydanticOutputParser

program = LLMTextCompletionProgram.from_defaults(
    output_parser=PydanticOutputParser(output_cls=Album),
    prompt_template_str=prompt_template_str,
    verbose=True,
)

In [20]:

output = program(movie_name="Jo Jeeta wohi sikandar")
output

Album(name='Jo Jeeta Wohi Sikandar', artist='Various Artists', songs=[Song(title='Pehla Nasha', length_seconds=240), Song(title='Yahaan Ke Hum Sikandar', length_seconds=210), Song(title='Rooth Ke Humse', length_seconds=195), Song(title='Shehar Ki Pariyon', length_seconds=220), Song(title='Naam Hai Mera Fonseca', length_seconds=180)])

#### Define a Custom Output Parser

Sometimes you may want to parse an output your own way into a JSON object.

In [21]:
from llama_index.core.output_parsers import ChainableOutputParser

In [22]:
class CustomAlbumOutputParser(ChainableOutputParser):
    """Custom Album output parser.

    Assume first line is name and artist.

    Assume each subsequent line is the song.

    """

    def __init__(self, verbose: bool = False):
        self.verbose = verbose

    def parse(self, output: str) -> Album:
        """Parse output."""
        if self.verbose:
            print(f"> Raw output: {output}")
        lines = output.split("\n")
        name, artist = lines[0].split(",")
        songs = []
        for i in range(1, len(lines)):
            title, length_seconds = lines[i].split(",")
            songs.append(Song(title=title, length_seconds=length_seconds))

        return Album(name=name, artist=artist, songs=songs)

In [23]:
prompt_template_str = """\
Generate an example album, with an artist and a list of songs. \
Using the movie {movie_name} as inspiration.\

Return answer in following format.
The first line is:
<album_name>, <album_artist>
Every subsequent line is a song with format:
<song_title>, <song_length_seconds>

"""

In [24]:
program = LLMTextCompletionProgram.from_defaults(
    output_parser=CustomAlbumOutputParser(verbose=True),
    output_cls=Album,
    prompt_template_str=prompt_template_str,
    verbose=True,
)

In [25]:
output = program(movie_name="The Dark Knight")

> Raw output: Gotham's Reckoning, The Joker
Chaos Unleashed, 180
The Dark Knight Rises, 210
Gotham's Savior, 195
Harvey's Descent, 160
The Bat Signal, 175
The Clown Prince of Crime, 190
The Caped Crusader, 200
Gotham City Under Siege, 185
The Dark Knight's Triumph, 220
