# Structured generation and extraction

In [1]:
import json
from rich.pretty import pprint
from phi.agent import Agent
from phi.model.ollama import Ollama

In [2]:
OLLAMA_HOST="localhost"

### Structured Generation

In [3]:
from typing import List
from pydantic import BaseModel, Field

We'll define our structured object with a simple class derived from pydantic's BaseModel:

In [4]:
class MovieScript(BaseModel):
    setting: str = Field(..., description="Provide a nice setting for a blockbuster movie.")
    ending: str = Field(..., description="Ending of the movie. If not available, provide a happy ending.")
    genre: str = Field(
        ..., description="Genre of the movie. If not available, select action, thriller or romantic comedy."
    )
    name: str = Field(..., description="Give a name to this movie")
    characters: List[str] = Field(..., description="Name of characters for this movie.")
    storyline: str = Field(..., description="3 sentence storyline for the movie. Make it exciting!")


Now we define the agent and give it an instruction. The response_model should be our MovieScript class:

In [5]:
# Agent that uses JSON mode
movie_agent = Agent(
    #model=Ollama(id="smollm2"), # how smol can you go?
    model=Ollama(id="hermes3:8b-llama3.1-q8_0", host=OLLAMA_HOST),
    description="You write movie scripts.",
    markdown=False,
    instructions=["Only output JSON, no json_object tag."],
    response_model=MovieScript,
)

We don't need function-calling or tools for this. This will all done with prompting:

In [6]:
pprint(movie_agent.get_system_message().content)

In [7]:
from pprint import pprint
# Get the response in a variable
res = movie_agent.run("A tech thriller in Darmstadt")
pprint(res.content)

#movie_agent.print_response("A tech thriller in Darmstadt")

MovieScript(setting='Darmstadt, Germany', ending='The protagonist manages to expose the corrupt tech company and bring them to justice.', genre='tech thriller', name='Cyber Conspiracy', characters=['Lena Hoffman - Cybersecurity Expert', 'Max Schneider - Journalist', 'Oliver Meyer - CEO of Tech Giant'], storyline="In Darmstadt, a brilliant cybersecurity expert discovers a sinister conspiracy within the city's largest tech company. As she works to expose their illicit activities, she forms an unlikely alliance with a tenacious journalist. Together, they must risk everything to prevent the company from unleashing a dangerous new technology on the world.")


### Structured extraction

In [8]:
extract_agent = Agent(
    #model=Ollama(id="sroecker/nuextract-tiny-v1.5", host=OLLAMA_HOST),
    model=Ollama(id="iodose/nuextract-v1.5", host=OLLAMA_HOST, options={"temperature": 0}),
    markdown=False,
    structured_outputs=True
)

We'll have to define a simple helper function that produces the template needed for NuExtract models:

In [9]:
def predict_nuextract(input_text):
    json_template = """
    {
        "Model": {
            "Name": "",
            "Number of parameters": "",
            "Number of max token": "",
            "Architecture": []
        },
        "Usage": {
            "Use case": [],
            "Licence": ""
        }
    }
    """
    template = f"""<|input|>\n### Template:\n{json_template}\n### Text:\n{input_text}\n\n<|output|>"""

    return template

In [10]:
example_text = """We introduce Mistral 7B, a 7–billion-parameter language model engineered for
superior performance and efficiency. Mistral 7B outperforms the best open 13B
model (Llama 2) across all evaluated benchmarks, and the best released 34B
model (Llama 1) in reasoning, mathematics, and code generation. Our model
leverages grouped-query attention (GQA) for faster inference, coupled with sliding
window attention (SWA) to effectively handle sequences of arbitrary length with a
reduced inference cost. We also provide a model fine-tuned to follow instructions,
Mistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and
automated benchmarks. Our models are released under the Apache 2.0 license.
Code: <https://github.com/mistralai/mistral-src>
Webpage: <https://mistral.ai/news/announcing-mistral-7b/>"""

Let's have a look at the exact prompt that will be fed to the model

In [11]:
print(predict_nuextract(example_text))

<|input|>
### Template:

    {
        "Model": {
            "Name": "",
            "Number of parameters": "",
            "Number of max token": "",
            "Architecture": []
        },
        "Usage": {
            "Use case": [],
            "Licence": ""
        }
    }
    
### Text:
We introduce Mistral 7B, a 7–billion-parameter language model engineered for
superior performance and efficiency. Mistral 7B outperforms the best open 13B
model (Llama 2) across all evaluated benchmarks, and the best released 34B
model (Llama 1) in reasoning, mathematics, and code generation. Our model
leverages grouped-query attention (GQA) for faster inference, coupled with sliding
window attention (SWA) to effectively handle sequences of arbitrary length with a
reduced inference cost. We also provide a model fine-tuned to follow instructions,
Mistral 7B – Instruct, that surpasses Llama 2 13B – chat model both on human and
automated benchmarks. Our models are released under the Apache 2.0 l

In [14]:
#extract_agent.print_response(predict_nuextract(example_text)) # this doesn't work in notebooks

In [15]:
result = extract_agent.run(predict_nuextract(example_text))
pprint(json.loads(result.content))

{'Model': {'Architecture': ['grouped-query attention (GQA)',
                            'sliding window attention (SWA)'],
           'Name': 'Mistral 7B',
           'Number of max token': '',
           'Number of parameters': '7 billion'},
 'Usage': {'Licence': 'Apache 2.0', 'Use case': []}}
