In [1]:
import pandas as pd
from openai import OpenAI


In [None]:
api_key = 'your api key'
client = OpenAI(
                api_key=api_key #개인 api_key
                )

dataset = pd.read_excel("./short_text_mimic.xlsx")

In [3]:
dataset

Unnamed: 0,note_id,text
0,10000032-RR-14,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...
1,10000032-RR-23,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...
2,10000084-RR-12,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...
3,10000102-RR-18,CHEST PA AND LATERAL.\n\nCOMPARISON: None.\n\...
4,10000117-RR-13,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATIO...
...,...,...
94,10001884-RR-85,INDICATION: ___ woman with shortness of breat...
95,10001884-RR-94,HISTORY: ___ female with chest tightness.\n\n...
96,10001884-RR-97,"INDICATION: Intermittent chest pain, evaluate..."
97,10001919-RR-22,EXAMINATION: CT CHEST W/CONTRAST\n\nINDICATIO...


### 최신 api 방식 따라하기
#### 2025.8월 기준 

In [4]:
radiology_report = dataset['text'][10]

In [6]:
print(radiology_report)

EXAMINATION:  CHEST (PA AND LAT)

INDICATION:  ___ with hypoxia  // ?pna, aspiration.

COMPARISON:  None

FINDINGS: 

PA and lateral views of the chest provided.   The lungs are adequately
aerated.

There is a focal consolidation at the left lung base adjacent to the lateral
hemidiaphragm. There is mild vascular engorgement. There is bilateral apical
pleural thickening.

The cardiomediastinal silhouette is remarkable for aortic arch calcifications.
The heart is top normal in size.

IMPRESSION: 

Focal consolidation at the left lung base, possibly representing aspiration or
pneumonia.

Central vascular engorgement.



In [7]:
from pydantic import BaseModel, Field
from typing import Literal
from openai import OpenAI

client = OpenAI(
                api_key=api_key #개인 api_key
                )

class XrayEvent(BaseModel):
    pneumonia: Literal["yes","no"] = Field(description="Patient has pneumonia ['yes'/'no'] (strict).")
    tuberculosis: Literal["yes","no"] = Field(description="Patient has tuberculosis ['yes'/'no'] (strict).")
    interstitial_lung_disease: Literal["yes","no"] = Field(description="Patient has interstitial lung disease ['yes'/'no'] (strict).")

response = client.responses.parse(
    model="gpt-4o-2024-08-06",
    input=[
        {"role": "system", 
        "content": "다음 방사선 판독에서 폐렴, 결핵, ILD의 존재 여부만 판단해라. 각 항목은 반드시 'yes' 또는 'no'로만 출력하라. 불명확하거나 언급이 없으면 'no'로 하라."},
        {
            "role": "user",
            "content": radiology_report,
        },
    ],
    text_format=XrayEvent,
)

structured_new = response.output_parsed

In [8]:
structured_new

XrayEvent(pneumonia='yes', tuberculosis='no', interstitial_lung_disease='no')

In [11]:
structured_new.pneumonia

'yes'

In [21]:
type(structured_new.pneumonia)

str

In [12]:
structured_new.interstitial_lung_disease

'no'

In [13]:
structured_new.tuberculosis

'no'

### Pydantic class란? 
- python의 타입힌트를 이용해서 입력데이터를 검증해주는 라이브러리
- 스키마 정의
    - 타입 제약 
    - 메타데이터 확인 가능 
    - model_json_chema, model_fields

- 장점: 좀 더 코드적으로는 간결함
- 단점: 코드가 어려운 분들에게는 어려울 수 있음


In [14]:
xray_class = XrayEvent

In [15]:
xray_class.model_json_schema()

{'properties': {'pneumonia': {'description': "Patient has pneumonia ['yes'/'no'] (strict).",
   'enum': ['yes', 'no'],
   'title': 'Pneumonia',
   'type': 'string'},
  'tuberculosis': {'description': "Patient has tuberculosis ['yes'/'no'] (strict).",
   'enum': ['yes', 'no'],
   'title': 'Tuberculosis',
   'type': 'string'},
  'interstitial_lung_disease': {'description': "Patient has interstitial lung disease ['yes'/'no'] (strict).",
   'enum': ['yes', 'no'],
   'title': 'Interstitial Lung Disease',
   'type': 'string'}},
 'required': ['pneumonia', 'tuberculosis', 'interstitial_lung_disease'],
 'title': 'XrayEvent',
 'type': 'object'}

In [16]:
xray_class.model_fields

{'pneumonia': FieldInfo(annotation=Literal['yes', 'no'], required=True, description="Patient has pneumonia ['yes'/'no'] (strict)."),
 'tuberculosis': FieldInfo(annotation=Literal['yes', 'no'], required=True, description="Patient has tuberculosis ['yes'/'no'] (strict)."),
 'interstitial_lung_disease': FieldInfo(annotation=Literal['yes', 'no'], required=True, description="Patient has interstitial lung disease ['yes'/'no'] (strict).")}

In [17]:
class XrayEventBool(BaseModel):
    pneumonia: bool = Field(description="Pneumonia present (True/False)")
    tuberculosis: bool = Field(description="Tuberculosis present (True/False)")
    interstitial_lung_disease: bool = Field(description="ILD present (True/False)")

response_bool = client.responses.parse(
    model="gpt-4o-2024-08-06",
    input=[
        {"role": "system", "content": "다음 방사선 판독에서 폐렴, 결핵, ILD의 존재 여부만 판단해라. 각 항목은 반드시 'yes' 또는 'no'로만 출력하라. 불명확하거나 언급이 없으면 'no'로 하라."},
        {
            "role": "user",
            "content": radiology_report,
        },
    ],
    text_format=XrayEventBool,
)

structured_new11 = response_bool.output_parsed

In [19]:
structured_new11

XrayEventBool(pneumonia=True, tuberculosis=False, interstitial_lung_disease=False)

In [18]:
structured_new11.pneumonia

True

In [20]:
type(structured_new11.pneumonia)

bool

#### 결과를 테이블에 넣어보기

In [22]:
dataset['text'][10]

'EXAMINATION:  CHEST (PA AND LAT)\n\nINDICATION:  ___ with hypoxia  // ?pna, aspiration.\n\nCOMPARISON:  None\n\nFINDINGS: \n\nPA and lateral views of the chest provided.   The lungs are adequately\naerated.\n\nThere is a focal consolidation at the left lung base adjacent to the lateral\nhemidiaphragm. There is mild vascular engorgement. There is bilateral apical\npleural thickening.\n\nThe cardiomediastinal silhouette is remarkable for aortic arch calcifications.\nThe heart is top normal in size.\n\nIMPRESSION: \n\nFocal consolidation at the left lung base, possibly representing aspiration or\npneumonia.\n\nCentral vascular engorgement.\n'

In [23]:
dataset

Unnamed: 0,note_id,text
0,10000032-RR-14,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...
1,10000032-RR-23,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...
2,10000084-RR-12,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...
3,10000102-RR-18,CHEST PA AND LATERAL.\n\nCOMPARISON: None.\n\...
4,10000117-RR-13,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATIO...
...,...,...
94,10001884-RR-85,INDICATION: ___ woman with shortness of breat...
95,10001884-RR-94,HISTORY: ___ female with chest tightness.\n\n...
96,10001884-RR-97,"INDICATION: Intermittent chest pain, evaluate..."
97,10001919-RR-22,EXAMINATION: CT CHEST W/CONTRAST\n\nINDICATIO...


In [24]:
dataset.loc[10, 'pneumonia'] = structured_new.pneumonia
dataset.loc[10, 'tuberculosis'] = structured_new.tuberculosis
dataset.loc[10, 'interstitial_lung_disease'] = structured_new.interstitial_lung_disease

In [25]:
dataset[:11]

Unnamed: 0,note_id,text,pneumonia,tuberculosis,interstitial_lung_disease
0,10000032-RR-14,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,,,
1,10000032-RR-23,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,,,
2,10000084-RR-12,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,,,
3,10000102-RR-18,CHEST PA AND LATERAL.\n\nCOMPARISON: None.\n\...,,,
4,10000117-RR-13,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATIO...,,,
5,10000117-RR-21,"EXAMINATION: Chest radiographs, PA and latera...",,,
6,10000117-RR-8,CHEST RADIOGRAPH PERFORMED.\n\nCOMPARISON: No...,,,
7,10000473-RR-7,EXAMINATION:\nChest: Frontal and lateral view...,,,
8,10000560-RR-28,"TWO-VIEW CHEST, ___.\n\nINDICATION: Renal cel...",,,
9,10000650-RR-16,INDICATION: ___ man with fever and cough.\n\n...,,,


In [26]:
for i in range(3):
  report_ = dataset.loc[i, 'text']
  resp = client.responses.parse(
    model="gpt-4o-2024-08-06",
    input=[
      {"role": "system", "content": "다음 방사선 판독에서 폐렴, 결핵, ILD가 있으면 추출하라. "},
      {"role": "user", "content": report_},
    ],
    text_format=XrayEvent,
    temperature=0,
  )
  event = resp.output_parsed
  dataset.loc[i, 'pneumonia'] = event.pneumonia
  dataset.loc[i, 'tuberculosis'] = event.tuberculosis
  dataset.loc[i, 'interstitial_lung_disease'] = event.interstitial_lung_disease
  print(i, "번째:", event.model_dump())

0 번째: {'pneumonia': 'no', 'tuberculosis': 'no', 'interstitial_lung_disease': 'no'}
1 번째: {'pneumonia': 'no', 'tuberculosis': 'no', 'interstitial_lung_disease': 'no'}
2 번째: {'pneumonia': 'no', 'tuberculosis': 'no', 'interstitial_lung_disease': 'no'}


In [27]:
dataset[:10]

Unnamed: 0,note_id,text,pneumonia,tuberculosis,interstitial_lung_disease
0,10000032-RR-14,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,no,no,no
1,10000032-RR-23,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,no,no,no
2,10000084-RR-12,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,no,no,no
3,10000102-RR-18,CHEST PA AND LATERAL.\n\nCOMPARISON: None.\n\...,,,
4,10000117-RR-13,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATIO...,,,
5,10000117-RR-21,"EXAMINATION: Chest radiographs, PA and latera...",,,
6,10000117-RR-8,CHEST RADIOGRAPH PERFORMED.\n\nCOMPARISON: No...,,,
7,10000473-RR-7,EXAMINATION:\nChest: Frontal and lateral view...,,,
8,10000560-RR-28,"TWO-VIEW CHEST, ___.\n\nINDICATION: Renal cel...",,,
9,10000650-RR-16,INDICATION: ___ man with fever and cough.\n\n...,,,


### Boolean type 

In [28]:
class XrayEventBool(BaseModel):
    pneumonia: bool = Field(description="Pneumonia present (True/False)")
    tuberculosis: bool = Field(description="Tuberculosis present (True/False)")
    interstitial_lung_disease: bool = Field(description="ILD present (True/False)")

for i in range(3):
  report_ = dataset.loc[i, 'text']
  resp = client.responses.parse(
    model="gpt-4o-2024-08-06",
    input=[
      {"role": "system", "content": "다음 방사선 판독에서 폐렴, 결핵, ILD가 있으면 추출하라. "},
      {"role": "user", "content": report_},
    ],
    text_format=XrayEventBool,
    temperature=0,
  )
  event = resp.output_parsed
  dataset.loc[i, 'pneumonia'] = event.pneumonia
  dataset.loc[i, 'tuberculosis'] = event.tuberculosis
  dataset.loc[i, 'interstitial_lung_disease'] = event.interstitial_lung_disease
  print(i, "번째:", event.model_dump())

0 번째: {'pneumonia': False, 'tuberculosis': False, 'interstitial_lung_disease': False}
1 번째: {'pneumonia': False, 'tuberculosis': False, 'interstitial_lung_disease': False}
2 번째: {'pneumonia': False, 'tuberculosis': False, 'interstitial_lung_disease': False}


In [29]:
dataset[:10]

Unnamed: 0,note_id,text,pneumonia,tuberculosis,interstitial_lung_disease
0,10000032-RR-14,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,False,False,False
1,10000032-RR-23,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,False,False,False
2,10000084-RR-12,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATION...,False,False,False
3,10000102-RR-18,CHEST PA AND LATERAL.\n\nCOMPARISON: None.\n\...,,,
4,10000117-RR-13,EXAMINATION: CHEST (PA AND LAT)\n\nINDICATIO...,,,
5,10000117-RR-21,"EXAMINATION: Chest radiographs, PA and latera...",,,
6,10000117-RR-8,CHEST RADIOGRAPH PERFORMED.\n\nCOMPARISON: No...,,,
7,10000473-RR-7,EXAMINATION:\nChest: Frontal and lateral view...,,,
8,10000560-RR-28,"TWO-VIEW CHEST, ___.\n\nINDICATION: Renal cel...",,,
9,10000650-RR-16,INDICATION: ___ man with fever and cough.\n\n...,,,


### 이전과 같은 코드 

In [30]:
radiology_report

'EXAMINATION:  CHEST (PA AND LAT)\n\nINDICATION:  ___ with hypoxia  // ?pna, aspiration.\n\nCOMPARISON:  None\n\nFINDINGS: \n\nPA and lateral views of the chest provided.   The lungs are adequately\naerated.\n\nThere is a focal consolidation at the left lung base adjacent to the lateral\nhemidiaphragm. There is mild vascular engorgement. There is bilateral apical\npleural thickening.\n\nThe cardiomediastinal silhouette is remarkable for aortic arch calcifications.\nThe heart is top normal in size.\n\nIMPRESSION: \n\nFocal consolidation at the left lung base, possibly representing aspiration or\npneumonia.\n\nCentral vascular engorgement.\n'

In [35]:
client = OpenAI(
                api_key=api_key #개인 api_key
                )

response = client.responses.create(
  model="gpt-5",
  input=[
    {
      "role": "developer",
      "content": [
        {
          "type": "input_text",
          "text": "다음 방사선 판독에서 폐렴, 결핵, ILD의 존재 여부만 판단해라. 각 항목은 반드시 'yes' 또는 'no'로만 출력하라. 불명확하거나 언급이 없으면 'no'로 하라."
        }
      ]
    },
    {
      "role": "user",
      "content": [
        {
          "type": "input_text",
          "text": radiology_report}
      ]
    }
  ],
  text={
    "format": {
      "type": "json_schema",
      "name": "xray_event",
      "strict": True,
      "schema": {
        "type": "object",
        "properties": {
          "pneumonia": {
            "type": "string",
            "description": "Patient has pneumonia ['yes'/'no'] (strict).",
            "enum": [
              "yes",
              "no"
            ]
          },
          "tuberculosis": {
            "type": "string",
            "description": "Patient has tuberculosis ['yes'/'no'] (strict).",
            "enum": [
              "yes",
              "no"
            ]
          },
          "interstitial_lung_disease": {
            "type": "string",
            "description": "Patient has interstitial lung disease ['yes'/'no'] (strict).",
            "enum": [
              "yes",
              "no"
            ]
          }
        },
        "required": [
          "pneumonia",
          "tuberculosis",
          "interstitial_lung_disease"
        ],
        "additionalProperties": False
      }
    },
    "verbosity": "medium"
  },
  reasoning={
    "effort": "low"
  },
  tools=[],
  store=True,
  include=[
    "reasoning.encrypted_content",
    "web_search_call.action.sources"
  ]
)

In [31]:
response.output_text

'{"pneumonia":"yes","tuberculosis":"no","interstitial_lung_disease":"no"}'