# test

In [None]:
from pydantic import BaseModel, Field
from typing import List

class Joke(BaseModel):
    setup: str = Field(description="笑话的开头问题")
    punchline: str = Field(description="笑点答案")
    tags: List[str] = Field(description="笑话的标签列表")

In [None]:
from langchain.output_parsers import PydanticOutputParser

parser = PydanticOutputParser(pydantic_object=Joke)
parser

In [None]:
format_instructions = parser.get_format_instructions()
print(format_instructions)

# code / sglang struct output

In [6]:
from pyexpat import model
from openai import OpenAI
import os
from typing import List, Literal
from pydantic import BaseModel, Field

client = OpenAI()

# SGLang 方式：使用 Pydantic 模型
class CapitalInfo(BaseModel):
    name: str = Field(..., pattern=r"^\w+$", description="Name of the capital city")
    population: int = Field(..., description="Population of the capital city")

class Event(BaseModel):
    event: str = Field(..., description="事件代码")
    level: Literal["critical", "caution", "suggestion"] = Field(..., description="事件类型")
    analyze: str = Field(..., max_length=50, description="50字以内中文分析事件")
    message: str = Field(..., max_length=15, description="15字以内中文描述, 提示")
    confidence: float = Field(..., ge=0.0, le=1.0, description="置信度，0.0-1.0")

class Notice(BaseModel):
    notice: str = Field(..., max_length=15, description="15字以内中文描述, 提示")
    priority: float = Field(..., ge=0.0, le=1.0, description="数值越大优先级越高")

class Caption(BaseModel):
    events: List[Event] = Field(..., description="事件列表")
    notices: List[Notice] = Field(..., description="通知列表")


response = client.chat.completions.create(
    model=os.getenv("OPENAI_MODEL"),
    # messages=[{"role": "user", "content": "Please generate the information of the capital of France in the JSON format."}],
    messages=[{"role": "user", "content": "随意生成一些事件和通知，要求事件有3个，通知有2个。"}],
    response_format={
        "type": "json_schema",
        "json_schema": {
            "name": "foo",
            # "schema": CapitalInfo.model_json_schema(),
            "schema": Caption.model_json_schema(),
        },
    },
)
response

ChatCompletion(id='5c69ad0f7d874a7899870cee492f13c9', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='{\n    "events": [\n        {\n            "event": "年度技术研讨会",\n            "level": "critical",\n            "analyze": "涉及公司核心技术交流，需各部门负责人参与",\n            "message": "2024年度技术研讨会即将召开",\n            "confidence": 0.95\n        },\n        {\n            "event": "客户需求评审会",\n            "level": "critical",\n            "analyze": "针对新客户核心需求评估，需销售团队和研发团队共同参与",\n            "message": "新客户需求评审会，请销售和市场","confidence": 0.90\n        },\n        {\n            "event": "办公室设备维护",\n            "level": "critical",\n            "analyze": "办公设备定期检查和维护，确保办公环境正常运作",\n            "message": "办公室设备维护通知，请各部门于", "confidence": 0.85\n        }\n    ],\n    "notices": [\n        {\n            "notice": "项目进度更新",\n            "priority": 1\n        },\n        {\n            "notice": "员工培训安排",\n            "priority": 1\n        }\n    ]\n}', refu

In [7]:
print(response.choices[0].message.content)

{
    "events": [
        {
            "event": "年度技术研讨会",
            "level": "critical",
            "analyze": "涉及公司核心技术交流，需各部门负责人参与",
            "message": "2024年度技术研讨会即将召开",
            "confidence": 0.95
        },
        {
            "event": "客户需求评审会",
            "level": "critical",
            "analyze": "针对新客户核心需求评估，需销售团队和研发团队共同参与",
            "message": "新客户需求评审会，请销售和市场","confidence": 0.90
        },
        {
            "event": "办公室设备维护",
            "level": "critical",
            "analyze": "办公设备定期检查和维护，确保办公环境正常运作",
            "message": "办公室设备维护通知，请各部门于", "confidence": 0.85
        }
    ],
    "notices": [
        {
            "notice": "项目进度更新",
            "priority": 1
        },
        {
            "notice": "员工培训安排",
            "priority": 1
        }
    ]
}


# code / langchain / with_structured_output

In [7]:
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field

# 定义 Pydantic 模型
class CapitalInfo(BaseModel):
    name: str = Field(..., pattern=r"^\w+$", description="Name of the capital city")
    population: int = Field(..., description="Population of the capital city")

# 创建模型并绑定结构化输出
model = ChatOpenAI(model=os.getenv("OPENAI_MODEL"))
structured_model = model.with_structured_output(CapitalInfo, include_raw=True)

# 直接调用
result = structured_model.invoke("Please generate the information of the capital of France in the JSON format.")
print(result)  # 输出: CapitalInfo(name='Paris', population=2147000)

{'raw': AIMessage(content='{\n  "name": "Paris",\n  "population": 2148281\n}', additional_kwargs={'parsed': CapitalInfo(name='Paris', population=2148281), 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 20, 'prompt_tokens': 20, 'total_tokens': 40, 'completion_tokens_details': None, 'prompt_tokens_details': None, 'reasoning_tokens': 0}, 'model_name': 'glm4.1v-9b-thinking', 'system_fingerprint': None, 'id': '15138192bdb6479ebf1cc1777b1cfb5d', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--15e82cd7-a443-49f8-b72b-68b4888da187-0', usage_metadata={'input_tokens': 20, 'output_tokens': 20, 'total_tokens': 40, 'input_token_details': {}, 'output_token_details': {}}), 'parsed': CapitalInfo(name='Paris', population=2148281), 'parsing_error': None}


In [9]:
result.keys()

dict_keys(['raw', 'parsed', 'parsing_error'])

In [10]:
result['parsed']

CapitalInfo(name='Paris', population=2148000)

In [11]:
type(result['parsed'])

__main__.CapitalInfo

In [13]:
from typing import List, Literal, Union
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_core.prompts import ChatPromptTemplate
import yaml
import json

# 1. 定义数据模型
class EventItem(BaseModel):
    event: str = Field(..., description="事件描述")
    type: Literal["critical", "caution", "suggestion"] = Field(..., description="事件类型")
    analyze: str = Field(..., max_length=50, description="50字以内分析事件")
    message: str = Field(..., max_length=15, description="15字以内中文描述，提示")
    confidence: float = Field(..., ge=0.0, le=1.0, description="置信度，0.0-1.0")

class EventsList(BaseModel):
    events: List[EventItem] = Field(..., description="事件列表")

# 2. 创建系统提示生成函数
def create_scene_system_prompt_xxx_out(output_format: str):
    format_dict = {
        "JSON": """
# Format / 输出格式 (严格遵守，不得新增或修改顶级字段)
[
   {
      "event": "<string>", 
      "type": "<critical|caution|suggestion>", 
      "analyze": "<string, 50字以内分析事件>", 
      "message": "<string, 15字以内中文描述, 提示>", 
      "confidence": <float, 0.0-1.0>
   },
   # ... more events
]
""",
        "YAML": """
# Format / 输出格式 (严格遵守，不得新增或修改顶级字段)
events:
  - event: <string>
    type: <critical|caution|suggestion>
    analyze: <string, 50字以内分析事件>
    message: <string, 15字以内中文描述, 提示>
    confidence: <float, 0.0-1.0>
  # ... more events
"""
    }
    
    system_prompt = f"""
你是一个场景分析专家，需要根据输入的场景描述生成结构化的事件列表。

请严格按照以下{output_format}格式输出：

{format_dict[output_format]}

要求：
1. 严格遵守输出格式，不得新增或修改顶级字段
2. 每个事件必须包含所有必需字段
3. analyze字段必须在50字以内
4. message字段必须在15字以内
5. confidence必须是0.0-1.0之间的浮点数
6. type只能是critical、caution或suggestion之一
"""
    return system_prompt

# 3. 创建事件生成器类
class EventGenerator:
    def __init__(self, model_name="gpt-4o", temperature=0):
        self.model = ChatOpenAI(model=model_name, temperature=temperature)
        # 预先创建结构化模型
        self.structured_model = self.model.with_structured_output(EventsList)
    
    def generate_events(self, scene_description: str, output_format: str = "JSON") -> dict:
        """生成结构化事件列表"""
        if output_format == "JSON":
            return self._generate_json_events(scene_description)
        elif output_format == "YAML":
            return self._generate_yaml_events(scene_description)
        else:
            raise ValueError("不支持的输出格式，请选择'JSON'或'YAML'")
    
    def _generate_json_events(self, scene_description: str) -> dict:
        """生成JSON格式事件列表"""
        system_prompt = create_scene_system_prompt_xxx_out("JSON")
        
        messages = [
            SystemMessage(content=system_prompt),
            HumanMessage(content=f"场景描述：{scene_description}")
        ]
        
        # 使用 with_structured_output 直接返回 Pydantic 对象
        result = self.structured_model.invoke(messages)
        return result.model_dump()
    
    def _generate_yaml_events(self, scene_description: str) -> dict:
        """生成YAML格式事件列表"""
        system_prompt = create_scene_system_prompt_xxx_out("YAML")
        
        prompt = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            ("human", "场景描述：{scene}")
        ])
        
        # 使用 with_structured_output
        chain = prompt | self.structured_model
        result = chain.invoke({"scene": scene_description})
        
        # 返回字典格式，由调用者决定是否转换为 YAML 字符串
        return result.model_dump()

# 4. 使用示例
if __name__ == "__main__":
    generator = EventGenerator(
        model_name=os.getenv("OPENAI_MODEL"),
    )
    scene = "一个工厂中，机器突然停止工作，工人试图重启但失败。"
    
    # 生成JSON格式
    json_result = generator.generate_events(scene, "JSON")
    print("JSON格式输出:")
    print(json.dumps(json_result, indent=2, ensure_ascii=False))
    
    # 生成YAML格式
    yaml_result = generator.generate_events(scene, "YAML")
    print("\nYAML格式输出:")
    print(yaml.dump(yaml_result, allow_unicode=True, sort_keys=False))

JSON格式输出:
{
  "events": [
    {
      "event": "机器突然停止工作",
      "type": "critical",
      "analyze": "机器停机可能影响生产进度和安全，需立即排查原因。",
      "message": "机器故障，立即检查。",
      "confidence": 0.9
    },
    {
      "event": "工人尝试重启机器失败",
      "type": "caution",
      "analyze": "重启失败表明问题复杂，可能涉及硬件或软件故障，需专业人员介入。",
      "message": "重启失败，联系技术人员。",
      "confidence": 0.8
    }
  ]
}

YAML格式输出:
events:
- event: 机器突然停止工作
  type: critical
  analyze: 机器停机可能导致生产中断和安全隐患
  message: 机器停机，需紧急处理
  confidence: 0.9
- event: 工人尝试重启机器失败
  type: caution
  analyze: 重启失败可能预示更严重故障，需进一步检查
  message: 重启失败，需排查原因
  confidence: 0.8

