In [None]:
import os
import xml.etree.ElementTree as ET
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.applications import VGG16


# 数据集路径
voc_path = 'VOC2007'
images_path = os.path.join(voc_path, 'JPEGImages')
annotations_path = os.path.join(voc_path, 'Annotations')
train_list_path = os.path.join(voc_path, 'ImageSets/Main/train.txt')
test_list_path = os.path.join(voc_path, 'ImageSets/Main/val.txt')

# 加载训练集图像列表
with open(train_list_path, 'r') as f:
    train_images = [line.strip() for line in f.readlines()]
# 加载测试集图像列表
with open(test_list_path, 'r') as f:
    test_images = [line.strip() for line in f.readlines()]

# 定义图像和标注的预处理函数
def load_and_preprocess_image(image_id, target_size=(224, 224)):
    # 加载图像
    image_id = image_id.numpy().decode('utf-8')
    image_path = os.path.join(images_path, f'{image_id}.jpg')
    image = Image.open(image_path)
    image = image.resize(target_size)
    image = np.array(image) / 255.0  # 归一化到[0, 1]
    
    # 解析标注文件
    annotation_path = os.path.join(annotations_path, f'{image_id}.xml')
    tree = ET.parse(annotation_path)
    root = tree.getroot()
    
    # 提取边界框和类别信息
    boxes = []
    labels = []
    for obj in root.findall('object'):
        name = obj.find('name')
        bbox = obj.find('bbox')
        if name is not None and bbox is not None:
            label = name.text
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(label)

    return image, boxes, labels

# 将类别名称映射为整数标签
class_names = ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow',
               'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
class_mapping = {name: i for i, name in enumerate(class_names)}

# 将类别标签转换为one-hot编码
def labels_to_one_hot(labels):
    if not labels:
        return np.zeros((len(class_names),))  # 返回一个形状为 [20] 的全零数组
    one_hot_labels = np.zeros((len(labels), len(class_names)))
    for i, label in enumerate(labels):
        one_hot_labels[i, class_mapping[label]] = 1
    return one_hot_labels

# 创建TensorFlow数据集
def create_dataset(image_ids):
    def load_data(image_id):
        image, boxes, labels = load_and_preprocess_image(image_id)
        one_hot_labels = labels_to_one_hot(labels)
        if one_hot_labels.ndim == 2 and one_hot_labels.shape[0] == 1:
            one_hot_labels = one_hot_labels.squeeze(0)  # 去除多余的维度
        return image, one_hot_labels

    def set_shapes(image, label):
        image.set_shape((224, 224, 3))
        label.set_shape((20,))
        return image, label

    dataset = tf.data.Dataset.from_tensor_slices(image_ids)
    dataset = dataset.map(lambda x: tf.py_function(load_data, [x], [tf.float32, tf.float32]))
    dataset = dataset.map(set_shapes)  # 设置形状
    dataset = dataset.batch(32)
    return dataset
# 创建训练数据集
train_dataset = create_dataset(train_images)
# 创建测试数据集
test_dataset = create_dataset(test_images)

# 定义YOLO模型架构
def yolo_model(input_shape=(448, 448, 3)):
    input_tensor = Input(shape=input_shape)
    x = Conv2D(64, (7, 7), strides=(2, 2), activation='relu')(input_tensor)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(192, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (1, 1), activation='relu')(x)
    x = Conv2D(256, (3, 3), activation='relu')(x)
    x = Conv2D(256, (1, 1), activation='relu')(x)
    x = Conv2D(512, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(256, (1, 1), activation='relu')(x)
    x = Conv2D(512, (3, 3), activation='relu')(x)
    x = Conv2D(512, (1, 1), activation='relu')(x)
    x = Conv2D(1024, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(512, (1, 1), activation='relu')(x)
    x = Conv2D(1024, (3, 3), activation='relu')(x)
    x = Conv2D(1024, (3, 3), activation='relu')(x)
    x = Conv2D(1024, (3, 3), strides=(2, 2), activation='relu')(x)
    x = Conv2D(1024, (3, 3), activation='relu')(x)
    x = Conv2D(1024, (3, 3), activation='relu')(x)
    x = Flatten()(x)
    x = Dense(4096, activation='relu')(x)
    output = Dense(1470, activation='linear')(x)  # 7x7x(2*5+20) for 20 classes
    return Model(inputs=input_tensor, outputs=output)

# 创建YOLO模型
model = yolo_model()

# 编译模型
model.compile(optimizer='adam', loss='mse')
# 训练模型，并在训练过程中使用测试数据集进行验证
history = model.fit(train_dataset, epochs=10, validation_data=test_dataset, verbose=2)
# 评估模型在测试集上的性能
test_loss, test_acc = model.evaluate(test_dataset, verbose=2)
print(f'Test accuracy: {test_acc}')
