# Google Colab Version: [Open this notebook in Google Colab](https://colab.research.google.com/github/starfishdata/starfish/blob/main/examples/structured_llm.ipynb)


### Dependencies 

In [8]:
## Fix for Jupyter Notebook only ‚Äî do NOT use in production
## Enables async code execution in notebooks, but may cause issues with sync/async issues
## For production, please run in standard .py files without this workaround
## See: https://github.com/erdewit/nest_asyncio for more details
import nest_asyncio
nest_asyncio.apply()


from starfish import StructuredLLM
from starfish.common.env_loader import load_env_file ## Load environment variables from .env file

load_env_file()

### Structured LLM - Single
#### 1. Model provider LLM Call

In [9]:
first_llm = StructuredLLM(
    model_name="openai/gpt-4o-mini",
    prompt="Facts about city {{city_name}}.",
    output_schema=[{"name": "question", "type": "str"}, {"name": "answer", "type": "str"}],
    model_kwargs={"temperature": 0.7},
)

first_response = await first_llm.run(city_name="New York")
first_response.data

[{'question': 'What is the nickname of New York City?',
  'answer': 'The Big Apple'}]

In [10]:
print(first_llm.render_prompt_printable(city_name="New York", num_records=5))


üìù CONSTRUCTED MESSAGES:

Role: user
Content:
Facts about city New York.


You are asked to generate exactly 5 records and please return the data in the following JSON format:
[
    {
    "question": ""  //  (required),
    "answer": ""  //  (required)
    }
    ...
]

Required fields: question, answer


End of prompt



#### 2. Customized Openai Compatible Model provider LLM Call

In [11]:
first_llm = StructuredLLM(
    model_name="hyperbolic/deepseek-ai/DeepSeek-V3-0324",
    prompt="Facts about city {{city_name}}.",
    output_schema=[{"name": "question", "type": "str"}, {"name": "answer", "type": "str"}],
    model_kwargs={"temperature": 0.7},
)

first_response = await first_llm.run(city_name="New York", num_records=5)
first_response.data

[{'question': 'What is the nickname of New York City?',
  'answer': 'The Big Apple'},
 {'question': 'Which famous park is located in the center of Manhattan?',
  'answer': 'Central Park'},
 {'question': 'What is the name of the tallest building in New York City?',
  'answer': 'One World Trade Center'},
 {'question': 'Which iconic statue is located in New York Harbor?',
  'answer': 'Statue of Liberty'},
 {'question': 'What is the name of the famous theater district in NYC?',
  'answer': 'Broadway'}]

#### 3. Local LLM

In [12]:
### Local model
first_llm = StructuredLLM(
    model_name="ollama/gemma3:1b",
    prompt="Facts about city {{city_name}}.",
    output_schema=[{"name": "question", "type": "str"}, {"name": "answer", "type": "str"}],
    model_kwargs={"temperature": 0.7},
)

first_response = await first_llm.run(city_name="New York", num_records=5)
first_response.data

[32m2025-04-16 22:34:40[0m | [1mINFO    [0m | [36mstarfish.llm.proxy.litellm_adapter[0m | [34mlitellm_adapter.py:94[0m | [1mEnsuring Ollama model gemma3:1b is ready...[0m
[32m2025-04-16 22:34:40[0m | [1mINFO    [0m | [36mstarfish.llm.backend.ollama_adapter[0m | [34mollama_adapter.py:63[0m | [1mStarting Ollama server...[0m
[32m2025-04-16 22:34:41[0m | [1mINFO    [0m | [36mstarfish.llm.backend.ollama_adapter[0m | [34mollama_adapter.py:79[0m | [1mOllama server started successfully[0m
[32m2025-04-16 22:34:41[0m | [1mINFO    [0m | [36mstarfish.llm.backend.ollama_adapter[0m | [34mollama_adapter.py:129[0m | [1mFound model gemma3:1b[0m
[32m2025-04-16 22:34:41[0m | [1mINFO    [0m | [36mstarfish.llm.backend.ollama_adapter[0m | [34mollama_adapter.py:232[0m | [1mModel gemma3:1b is already available[0m
[32m2025-04-16 22:34:41[0m | [1mINFO    [0m | [36mstarfish.llm.proxy.litellm_adapter[0m | [34mlitellm_adapter.py:103[0m | [1mModel gemma3:1b

[{'question': 'What is the population of New York City?',
  'answer': 'As of 2023, the population of New York City is approximately 8.8 million people.'}]

In [13]:
### Clean it up
from starfish.llm.backend.ollama_adapter import stop_ollama_server

await stop_ollama_server()

[32m2025-04-16 22:34:45[0m | [1mINFO    [0m | [36mstarfish.llm.backend.ollama_adapter[0m | [34mollama_adapter.py:254[0m | [1mStopping Ollama server...[0m
[32m2025-04-16 22:34:46[0m | [1mINFO    [0m | [36mstarfish.llm.backend.ollama_adapter[0m | [34mollama_adapter.py:305[0m | [1mOllama server stopped successfully[0m


True

### Structured LLM - Workflow
#### 1. Two LLM

In [14]:
from starfish import StructuredLLM
from starfish.llm.utils import merge_structured_outputs

first_llm = StructuredLLM(
    model_name="openai/gpt-4o-mini",
    prompt="Facts about city {{city_name}}.",
    output_schema=[{"name": "question", "type": "str"}, {"name": "answer", "type": "str"}],
)

first_response = await first_llm.run(city_name="New York", num_records=5)


second_llm = StructuredLLM(
    model_name="openai/gpt-4o-mini",
    prompt="""You will be given a list of question and answer pairs,
please rate each individually about its accuracy, funny and conciseness.
rating are from 1 to 10, 1 being the worst and 10 being the best.
lets also rank them among themself so from 1 being the best.
Here is question and answer pairs: {{QnA_pairs}}""",
    output_schema=[
        {"name": "accuracy", "type": "int"},
        {"name": "funny", "type": "int"},
        {"name": "conciseness", "type": "int"},
        {"name": "rank", "type": "int"},
    ],
    model_kwargs={"temperature": 1},
)

second_response = await second_llm.run(QnA_pairs=first_response.data)

### Merge result:
merge_structured_outputs(first_response.data, second_response.data)

[{'question': 'What is the population of New York City?',
  'answer': 'As of 2023, New York City has an estimated population of over 8.6 million people.',
  'accuracy': 10,
  'funny': 2,
  'conciseness': 9,
  'rank': 1},
 {'question': 'What is the most famous park in New York City?',
  'answer': 'Central Park is the most famous park in New York City, spanning 843 acres in the heart of Manhattan.',
  'accuracy': 10,
  'funny': 2,
  'conciseness': 9,
  'rank': 2},
 {'question': 'What is the significance of Times Square?',
  'answer': "Times Square is known as 'The Crossroads of the World' and is famous for its bright lights, Broadway theaters, and New Year's Eve ball drop.",
  'accuracy': 10,
  'funny': 3,
  'conciseness': 9,
  'rank': 3},
 {'question': 'What iconic statue can be found in New York Harbor?',
  'answer': 'The Statue of Liberty, a symbol of freedom and democracy, is located on Liberty Island in New York Harbor.',
  'accuracy': 10,
  'funny': 2,
  'conciseness': 9,
  'rank':