In [4]:
import os
import pandas as pd
import xml.etree.ElementTree as ET

def parse_xml(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    labels = []
    for obj in root.findall('object'):
        labels.append(obj.find('name').text)
    return labels

def encode_labels(labels, label_index):
    label_vector = [0] * len(label_index)
    for label in labels:
        if label in label_index:
            label_vector[label_index[label]] = 1
    return label_vector

def main():
    # 定義類別和標籤索引
    categories = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
    label_index = {category: idx for idx, category in enumerate(categories)}

    # 文件路徑設定
    # xml_dir = './VOC2012_train_val/Annotations'  # 修改為你的 XML 文件夾路徑
    xml_dir = './VOC2012_test/Annotations'  # 修改為你的 XML 文件夾路徑
    image_files = os.listdir(xml_dir)

    # 處理每個 XML 文件
    data = []
    for xml_file in image_files:
        labels = parse_xml(os.path.join(xml_dir, xml_file))
        label_vector = encode_labels(labels, label_index)
        file_name = xml_file[:-4]  # 去掉 ".xml" 擴展名
        data.append([file_name] + label_vector)

    # 保存到 CSV
    columns = ['image_name'] + categories
    df = pd.DataFrame(data, columns=columns)
    df.to_csv('test_label.csv', index=False)
    print('CSV file has been created successfully!')

if __name__ == '__main__':
    main()


CSV file has been created successfully!
