In [1]:
import os
import json
from openai import OpenAI
from dotenv import load_dotenv

from typing import Optional
from pydantic import BaseModel, Field
from datetime import date

# Load environment variables
load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENAI_API_BASE = os.getenv("OPENAI_API_BASE")

client = OpenAI(base_url=OPENAI_API_BASE, api_key=OPENAI_API_KEY)

# class Property(BaseModel):
#     id: int
#     title: str
#     description: str
#     neighborhood_description: str
#     city: str
#     state: str
#     neighborhood: Optional[str]
#     property_type: str
#     year_built: int
#     price: float
#     bedrooms: int
#     bathrooms: int
#     area_sqft: int
#     listed_date: date

class Property(BaseModel):
    id: int = Field(..., example=1)
    title: str = Field(..., example="Cozy 2-Bedroom Condo in Downtown")
    description: str = Field(..., example="This modern 2-bedroom, 1.5-bathroom condo...")
    neighborhood_description: str = Field(..., example="Located in the heart of downtown...")
    city: str = Field(..., example="San Francisco")
    state: str = Field(..., example="CA")
    neighborhood: Optional[str] = Field(None, example="Downtown")
    property_type: str = Field(..., example="Condo")
    year_built: int = Field(..., example=2010)
    price: float = Field(..., example=320000)
    bedrooms: int = Field(..., example=2)
    bathrooms: float = Field(..., example=1.5)
    area_sqft: int = Field(..., example=850)
    listed_date: date = Field(..., example="2025-04-01")

# Define the schema example
# example_property = {
#     "id": 1,
#     "title": "Cozy 2-Bedroom Condo in Downtown",
#     "description": "This modern 2-bedroom, 1.5-bathroom condo offers an open floor plan...",
#     "neighborhood_description": "Located in the heart of downtown San Francisco...",
#     "location": "San Francisco, CA",
#     "neighborhood": "Downtown",
#     "property_type": "Condo",
#     "year_built": 2010,
#     "price": 320000,
#     "bedrooms": 2,
#     "bathrooms": 1.5,
#     "area_sqft": 850,
#     "listed_date": "2025-04-01"
# }
example_property = Property.model_json_schema()["properties"]


# Prompt the LLM
prompt = f"""
Generate a new synthetic property listing in JSON format following this exact schema:
{json.dumps(example_property, indent=4)}

Make sure:
- All keys are present.
- Values are realistic but not copied.
- Use a new ID.
- listed_date should be in YYYY-MM-DD format.
"""

response = client.chat.completions.parse(
    model="gpt-4o-mini",  # or "gpt-4.1" if you prefer
    messages=[{"role": "user", "content": prompt}],
    temperature=0.7,
    response_format=Property,
)

# Parse the JSON output
output_text = response.choices[0].message.content.strip()

# Try parsing as JSON
try:
    synthetic_property = json.loads(output_text)
except json.JSONDecodeError:
    print("Model did not return valid JSON, printing raw response:")
    print(output_text)
    synthetic_property = None

print(synthetic_property)

{'id': 2, 'title': 'Charming 3-Bedroom House with Garden', 'description': 'This charming 3-bedroom, 2-bathroom house features a spacious living area, modern kitchen, and a beautiful garden perfect for entertaining.', 'neighborhood_description': 'Nestled in a quiet residential area, this home is just a short walk from local parks and schools, making it ideal for families.', 'city': 'Austin', 'state': 'TX', 'neighborhood': 'South Austin', 'property_type': 'House', 'year_built': 2015, 'price': 425000, 'bedrooms': 3, 'bathrooms': 2, 'area_sqft': 1500, 'listed_date': '2023-11-01'}
