In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
from starfish import StructuredLLM, data_factory

In [3]:
facts_generator = StructuredLLM(
    model_name="openai/gpt-4o-mini",
    prompt="""Generate facts about {{city_name}} on {{topic}}""",
    output_schema=[{"name": "question", "type": "str"}, {"name": "answer", "type": "str"}],
    model_kwargs={"temperature": 0.7},
)
response = await facts_generator.run(city_name="San Francisco", topic="history")
response.data

[{'question': "What significant event in 1906 had a profound impact on San Francisco's development?",
  'answer': 'The San Francisco earthquake of 1906, followed by devastating fires, destroyed a large portion of the city, leading to extensive rebuilding and modernization efforts.'}]

In [4]:
# data_market.run(name = 'generator_with_topic',
#                    user_instructions = """generate facts about san francisco""",
#                    num_records = 100,
#                    topics = [{'history': 10, 'culture': 10, 'food': 10}])

In [10]:
user_instructions = """generate facts about san francisco"""
num_records = 100
topics = [{"history": 40}, {"culture": 10}, {"food": 10}]

In [11]:
from starfish.components import prepare_topic

topic_list = await prepare_topic(num_records=num_records, topics=topics, user_instructions=user_instructions, records_per_topic=10)

In [12]:
from collections import Counter

Counter([topic["topic"] for topic in topic_list])

Counter({'history': 40,
         'culture': 10,
         'food': 10,
         'Architecture and Landmarks': 10,
         'Transportation Systems': 10,
         'Natural Geography and Parks': 10,
         'Technology and Innovation': 10})

In [13]:
len(topic_list)

100

In [19]:
@data_factory(max_concurrency=5)
async def generate_facts(user_instructions: str, topic: str):
    print(f"Generating facts for {topic}...")
    facts_generator = StructuredLLM(
        model_name="openai/gpt-4o-mini",
        prompt="""{{user_instructions}} on {{topic}}""",
        output_schema=[{"name": "question", "type": "str"}, {"name": "answer", "type": "str"}],
        model_kwargs={"temperature": 0.7},
    )
    response = await facts_generator.run(user_instructions=user_instructions, topic=topic)
    return response.data


facts_data = generate_facts.run(data=topic_list, user_instructions=user_instructions)

[32m2025-04-12 09:53:59[0m | [1mINFO    [0m | [36mstarfish.utils.data_factory[0m | [34mdata_factory.py:180[0m | [1m
2. Creating master job...[0m
Generating facts for history...
Generating facts for history...
Generating facts for history...
Generating facts for history...
Generating facts for history...
Generating facts for history...
Generating facts for history...
Generating facts for history...
Generating facts for history...
Generating facts for history...
[32m2025-04-12 09:54:02[0m | [1mINFO    [0m | [36mstarfish.utils.data_factory[0m | [34mdata_factory.py:258[0m | [1mMaster job 82f03acc-275b-40f1-b106-4656f4ab48fd as completed[0m


KeyboardInterrupt: 

In [20]:
generate_facts.re_run(master_job_id="82f03acc-275b-40f1-b106-4656f4ab48fd")

[32m2025-04-12 09:54:18[0m | [1mINFO    [0m | [36mstarfish.utils.data_factory[0m | [34mdata_factory.py:258[0m | [1mMaster job 82f03acc-275b-40f1-b106-4656f4ab48fd as completed[0m


AttributeError: 'NoneType' object has no attribute 'items'

In [10]:
len(facts_data)

100