In [1]:
import os
import json
import pandas as pd

In [2]:
# 데이터셋 폴더 경로
dataset_folder = 'food_dataset/images'

In [3]:
# 모든 음식 폴더 순회
food_data = []
for food_class in os.listdir(dataset_folder):
    print("Processing: ", food_class)
    json_folder = os.path.join(dataset_folder, food_class, 'json')
    png_folder = os.path.join(dataset_folder, food_class, 'png')

    # 각 JSON 파일 읽기
    for json_file in os.listdir(json_folder):
        # '._.DS_Store'와 'desktop.ini' 파일 건너뛰기
        if json_file in ['._.DS_Store', 'desktop.ini']:
            continue

        # JSON 파일 읽기
        with open(os.path.join(json_folder, json_file), 'r', encoding='utf-8-sig') as file:
            data = json.load(file)
            instance_num = data['instance_num']
            ingredients = [ingredient['ingredient'] for ingredient in data['ingredients']]
            image_name = f"{food_class}_{str(instance_num).zfill(4)}.png"

            # PNG 이미지 파일 존재 여부 확인
            if os.path.exists(os.path.join(png_folder, image_name)):
                # 데이터 정리
                food_data.append([image_name, food_class, instance_num, ingredients])
            else:
                print(f"Missing image: {image_name}")

Processing:  baek_sook
Processing:  baguette
Processing:  banh_mi
Processing:  BBQ
Processing:  bibimbap
Processing:  bingsu
Processing:  bulgogi
Processing:  bunza
Processing:  burger
Processing:  burrito
Processing:  cake
Processing:  cannoli
Processing:  caprese
Processing:  chicken
Processing:  chili_crab
Processing:  chocolate
Processing:  churros
Processing:  coffee_hot
Processing:  coffee_ice
Processing:  cookie
Processing:  crepe
Processing:  croissant
Processing:  croque_monsieur
Processing:  curry
Processing:  dacquoise
Processing:  dim_sum
Processing:  donut
Processing:  egg_benedict
Processing:  egg_tart
Missing image: egg_tart_0783.png
Processing:  escargot
Processing:  fish_and_chips
Missing image: fish_and_chips_0991.png
Missing image: fish_and_chips_0992.png
Missing image: fish_and_chips_0993.png
Missing image: fish_and_chips_0994.png
Missing image: fish_and_chips_0995.png
Missing image: fish_and_chips_0996.png
Missing image: fish_and_chips_0997.png
Missing image: fish_

In [4]:
# 데이터프레임 생성
df = pd.DataFrame(food_data, columns=['image_name', 'food_class', 'instance_num', 'ingredients'])

# CSV 파일로 저장
df.to_csv('food_ingredient_new.csv', index=False)

In [5]:
df = pd.read_csv('food_ingredient_new.csv')
label_ingredients = ['egg', 'chicken', 'shrimp', 'cheese', 'pork', 'crab', 'cream', 'tofu', 'lobster', 'peanut', 'bread']

# 각 재료에 대해 라벨링
for ingredient in label_ingredients:
    # ingredients 컬럼에 해당 재료가 포함되어 있는지 확인하고, 1 또는 0으로 라벨링
    df[ingredient] = df['ingredients'].apply(lambda x: 1 if ingredient in x else 0)

# 수정된 DataFrame을 새로운 CSV 파일로 저장 (예시 경로: 'labeled_food_ingredients.csv')
# 실제 경로에 따라서 저장 경로를 수정해야 할 수 있습니다.
df.to_csv('labeled_food_ingredients.csv', index=False)

df.head()  # 결과 DataFrame의 첫 5행을 보여줍니다.

Unnamed: 0,image_name,food_class,instance_num,ingredients,egg,chicken,shrimp,cheese,pork,crab,cream,tofu,lobster,peanut,bread
0,baek_sook_1000.png,baek_sook,1000,"['egg', 'chicken', 'green_onion', 'clear_soup'...",1,1,0,0,0,0,0,0,0,0,0
1,baek_sook_1001.png,baek_sook,1001,"['chicken', 'clear_soup']",0,1,0,0,0,0,0,0,0,0,0
2,baek_sook_1002.png,baek_sook,1002,"['chicken', 'clear_soup']",0,1,0,0,0,0,0,0,0,0,0
3,baek_sook_1003.png,baek_sook,1003,"['chicken', 'clear_soup']",0,1,0,0,0,0,0,0,0,0,0
4,baek_sook_1004.png,baek_sook,1004,['chicken'],0,1,0,0,0,0,0,0,0,0,0
