In [1]:
import json

# 파일 열기
with open('multi_images.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 앞 5개 출력
for i, item in enumerate(data[:5]):
    print(f"Item {i+1}:")
    print(json.dumps(item, indent=2, ensure_ascii=False))
    print()

Item 1:
{
  "system_prompt": "Answer the following questions by considering all images.",
  "image": [
    "soccer1.jpg",
    "soccer2.jpg"
  ],
  "conversations": [
    {
      "from": "human",
      "value": "<image><image>Which image has more people? Give details to your answer."
    },
    {
      "from": "gpt",
      "value": "There are five people in the first image, while there are eleven people in the second. Therefore, the second image has more people."
    }
  ]
}

Item 2:
{
  "system_prompt": "Given an idea and a collection of photos, Assistant produces captivating social media content, such as a Twitter post, featuring well-crafted copy.",
  "image": [
    "post1.png",
    "post2.png",
    "post3.png",
    "post4.png"
  ],
  "conversations": [
    {
      "from": "human",
      "value": "Help me write a Twitter post considering the following images.<image><image><image><image>"
    },
    {
      "from": "gpt",
      "value": "Wandering through the enchanting streets of Mil

In [7]:
import json

# 파일 열기
with open('multi_images_train.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 앞 5개 출력
for i, item in enumerate(data[:5]):
    print(f"Item {i+1}:")
    print(json.dumps(item, indent=2, ensure_ascii=False))
    print()

Item 1:
{
  "system_prompt": "Given an idea and a collection of photos, Assistant produces captivating social media content, such as a Twitter post, featuring well-crafted copy.",
  "image": [
    "post1.png",
    "post2.png",
    "post3.png",
    "post4.png"
  ],
  "conversations": [
    {
      "from": "human",
      "value": "Help me write a Twitter post considering the following images.<image><image><image><image>"
    },
    {
      "from": "gpt",
      "value": "Wandering through the enchanting streets of Milan, where the vibrant pulse of the city is as captivating as its historical architecture. From the mesmerizing ... the gentle glow of candles offers a moment of serenity in this bustling metropolis. #MilanDiaries #TravelMoments #CityLights"
    }
  ],
  "messages": [
    {
      "role": "system",
      "content": "Given an idea and a collection of photos, Assistant produces captivating social media content, such as a Twitter post, featuring well-crafted copy."
    },
    {
  

In [8]:
import json

# 파일 열기
with open('multi_images_val.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# 앞 5개 출력
for i, item in enumerate(data[:5]):
    print(f"Item {i+1}:")
    print(json.dumps(item, indent=2, ensure_ascii=False))
    print()

Item 1:
{
  "system_prompt": "Answer the following questions by considering all images.",
  "image": [
    "soccer1.jpg",
    "soccer2.jpg"
  ],
  "conversations": [
    {
      "from": "human",
      "value": "<image><image>Which image has more people? Give details to your answer."
    },
    {
      "from": "gpt",
      "value": "There are five people in the first image, while there are eleven people in the second. Therefore, the second image has more people."
    }
  ],
  "messages": [
    {
      "role": "system",
      "content": "Answer the following questions by considering all images."
    },
    {
      "role": "user",
      "content": [
        {
          "type": "image"
        },
        {
          "type": "image"
        },
        {
          "type": "text",
          "text": "Which image has more people? Give details to your answer."
        }
      ]
    },
    {
      "role": "assistant",
      "content": "There are five people in the first image, while there are ele

In [6]:
import json
import random
import os
from tqdm import tqdm

def transform_sample(old_sample):
    system_prompt = old_sample.get("system_prompt", "")
    images = old_sample.get("image", [])  # 리스트
    conversations = old_sample.get("conversations", [])

    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})

    for i in range(0, len(conversations), 2):
        human_message = conversations[i]
        assistant_message = conversations[i + 1] if i + 1 < len(conversations) else None

        # human 메시지 처리
        user_content = []
        if "<image>" in human_message["value"]:
            # <image> 토큰 기준으로 텍스트 분할
            text_parts = human_message["value"].split("<image>")
            num_images_needed = len(text_parts) - 1

            if num_images_needed != len(images):
                raise ValueError(f"Mismatch between <image> count ({num_images_needed}) and actual images ({len(images)}).")

            for idx, text_part in enumerate(text_parts):
                if idx > 0:
                    user_content.append({"type": "image"})
                if text_part.strip() != "":
                    user_content.append({"type": "text", "text": text_part.strip()})
        else:
            # 텍스트만 있을 경우
            user_content.append({"type": "text", "text": human_message["value"]})

        messages.append({
            "role": "user",
            "content": user_content
        })

        # assistant 메시지
        if assistant_message:
            messages.append({
                "role": "assistant",
                "content": assistant_message["value"]
            })

    # 기존 conversations 필드는 그대로 두고, messages 추가
    transformed_sample = {
        "system_prompt": system_prompt,
        "image": images,
        "conversations": conversations,
        "messages": messages
    }

    return transformed_sample

def main():
    input_path = "multi_images.json"   # 기존 JSON 경로
    output_train_path = "multi_images_train.json"  # 변환된 train JSON
    output_val_path = "multi_images_val.json"      # 변환된 val JSON
    val_ratio = 0.1  # validation 비율

    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    transformed_data = []
    for item in tqdm(data, desc="Transforming"):
        try:
            transformed = transform_sample(item)
            transformed_data.append(transformed)
        except Exception as e:
            print(f"Error processing item: {e}")

    # 데이터 섞기
    random.shuffle(transformed_data)

    # 9:1로 분할
    split_idx = int(len(transformed_data) * (1 - val_ratio))
    train_data = transformed_data[:split_idx]
    val_data = transformed_data[split_idx:]

    # 저장
    with open(output_train_path, "w", encoding="utf-8") as f:
        json.dump(train_data, f, ensure_ascii=False, indent=2)

    with open(output_val_path, "w", encoding="utf-8") as f:
        json.dump(val_data, f, ensure_ascii=False, indent=2)

    print(f"✅ 변환 완료: 총 {len(transformed_data)}개 → train {len(train_data)}개 / val {len(val_data)}개")

if __name__ == "__main__":
    main()

Transforming: 100%|██████████| 2/2 [00:00<00:00, 19831.22it/s]

✅ 변환 완료: 총 2개 → train 1개 / val 1개



