In [1]:
import os
import random
import xml.etree.ElementTree as ET
import numpy as np
import shutil
import cv2
from pathlib import Path
from tqdm import tqdm
import json
import yaml
from PIL import Image
import uuid

# 切片

In [3]:
def copy_first_n_files(source_folder, destination_folder, n=6000):
    # 获取源文件夹中的所有文件（按文件名排序）
    files = os.listdir(source_folder)
    
    # 只保留文件，不包括文件夹
    files = [f for f in files if os.path.isfile(os.path.join(source_folder, f))]
    
    # 获取前n个文件
    files_to_copy = files[:n]
    
    # 如果目标文件夹不存在，则创建
    os.makedirs(destination_folder, exist_ok=True)
    
    # 复制文件到目标文件夹
    # for file_name in files_to_copy:
    #     src_path = os.path.join(source_folder, file_name)
    #     dst_path = os.path.join(destination_folder, file_name)
    #     
    #     # 复制文件
    #     shutil.copy(src_path, dst_path)
    #     print(f"复制: {file_name}")
    
    for file_name in tqdm(files_to_copy, desc="Copying files", unit="file"):
        src_path = os.path.join(source_folder, file_name)
        dst_path = os.path.join(destination_folder, file_name)
        
        # 复制文件
        shutil.copy(src_path, dst_path)


# 示例使用
source_folder = r'E:\ipynb\datasets\extracted_images\times'  # 源文件夹路径
destination_folder = r'E:\ipynb\datasets\math_need_to_use\times'  # 目标文件夹路径

copy_first_n_files(source_folder, destination_folder, 6000)


Copying files: 100%|██████████| 3251/3251 [00:11<00:00, 278.66file/s]


# voc

In [2]:
def create_yolo_format_voc(dataset_path, output_path, train_class=None):
    # 创建输出路径
    output_images_path = os.path.join(output_path, 'JPEGImages')
    output_annotations_path = os.path.join(output_path, 'Annotations')
    output_sets_path = os.path.join(output_path, 'ImageSets', 'Main')
    
    Path(output_images_path).mkdir(parents=True, exist_ok=True)
    Path(output_annotations_path).mkdir(parents=True, exist_ok=True)
    Path(output_sets_path).mkdir(parents=True, exist_ok=True)
    
    # 假设数据集按分类存放，每个子文件夹对应一个类别
    class_names = os.listdir(dataset_path)
    class_dict = {class_name: idx for idx, class_name in enumerate(class_names)}
    
    # 如果指定了特定的训练分类，则仅处理该类别
    if train_class:
        class_dict = {train_class: class_dict.get(train_class)}
    
    # 处理每个分类
    train_files = []
    val_files = []
    
    # 在遍历每个类别时添加进度条
    for class_name, class_idx in tqdm(class_dict.items(), desc="Processing classes", unit="class"):
        class_folder = os.path.join(dataset_path, class_name)
        
        if not os.path.isdir(class_folder):
            continue
        
        # 遍历每个图片文件时添加进度条
        image_files = [f for f in os.listdir(class_folder) if f.endswith(('.jpg', '.png', '.jpeg'))]
        for image_file in tqdm(image_files, desc=f"Processing images in {class_name}", unit="image"):
            image_path = os.path.join(class_folder, image_file)
            
            # 检查是否已经处理过该文件，若是则跳过
            annotation_path = os.path.join(output_annotations_path, f'{os.path.splitext(image_file)[0]}.xml')
            if os.path.exists(annotation_path):
                continue  # 如果标注文件已存在，则跳过
            
            # 读取图像
            image = cv2.imread(image_path)
            height, width, _ = image.shape
            
            # 生成VOC标注文件 (Yolo使用相对坐标)
            annotation = ET.Element('annotation')
            ET.SubElement(annotation, 'folder').text = 'JPEGImages'
            ET.SubElement(annotation, 'filename').text = image_file
            ET.SubElement(annotation, 'path').text = image_path

            source = ET.SubElement(annotation, 'source')
            ET.SubElement(source, 'database').text = 'Unknown'
            
            size = ET.SubElement(annotation, 'size')
            ET.SubElement(size, 'width').text = str(width)
            ET.SubElement(size, 'height').text = str(height)
            ET.SubElement(size, 'depth').text = '3'  # 3通道 (RGB)
            
            object_tag = ET.SubElement(annotation, 'object')
            ET.SubElement(object_tag, 'name').text = class_name
            ET.SubElement(object_tag, 'pose').text = 'Unspecified'
            ET.SubElement(object_tag, 'truncated').text = '0'
            ET.SubElement(object_tag, 'difficult').text = '0'
            
            bndbox = ET.SubElement(object_tag, 'bndbox')
            ET.SubElement(bndbox, 'xmin').text = '0'
            ET.SubElement(bndbox, 'ymin').text = '0'
            ET.SubElement(bndbox, 'xmax').text = str(width)
            ET.SubElement(bndbox, 'ymax').text = str(height)
            
            # 将XML写入Annotations文件夹
            tree = ET.ElementTree(annotation)
            tree.write(annotation_path)
            
            # 复制图像到JPEGImages
            shutil.copy(image_path, output_images_path)
            
            # 创建训练和验证集文件 (可以自定义划分比例)
            if len(train_files) < 0.8 * len(image_files):  # 假设80%用于训练
                train_files.append(f'{os.path.splitext(image_file)[0]}')
            else:
                val_files.append(f'{os.path.splitext(image_file)[0]}')

    # 写入train.txt和val.txt
    with open(os.path.join(output_sets_path, 'train.txt'), 'w') as f:
        for item in train_files:
            f.write(f'{item}\n')
    
    with open(os.path.join(output_sets_path, 'val.txt'), 'w') as f:
        for item in val_files:
            f.write(f'{item}\n')

# 调用函数
dataset_path = r'E:\ipynb\datasets\math_need_to_use'  # 数据集所在路径
output_path = r'E:\ipynb\datasets\voc_math'       # 输出路径
train_class = "times"  # 只处理某一分类

create_yolo_format_voc(dataset_path, output_path, train_class)


Processing classes:   0%|          | 0/1 [00:00<?, ?class/s]
Processing images in times:   0%|          | 0/3251 [00:00<?, ?image/s][A
Processing images in times:   3%|▎         | 97/3251 [00:00<00:03, 964.80image/s][A
Processing images in times:   6%|▌         | 197/3251 [00:00<00:03, 982.39image/s][A
Processing images in times:  10%|▉         | 312/3251 [00:00<00:02, 1056.12image/s][A
Processing images in times:  13%|█▎        | 421/3251 [00:00<00:02, 1067.21image/s][A
Processing images in times:  16%|█▋        | 529/3251 [00:00<00:02, 1068.79image/s][A
Processing images in times:  20%|█▉        | 638/3251 [00:00<00:02, 1074.01image/s][A
Processing images in times:  23%|██▎       | 746/3251 [00:00<00:02, 1074.11image/s][A
Processing images in times:  26%|██▋       | 860/3251 [00:00<00:02, 1094.40image/s][A
Processing images in times:  30%|██▉       | 972/3251 [00:00<00:02, 1100.53image/s][A
Processing images in times:  33%|███▎      | 1083/3251 [00:01<00:01, 1092.48image/s]

# voc_annotation

In [14]:
def get_classes(classes_path):
    with open(classes_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names, len(class_names)

#----------------------------------------------------------------------------------------------------#
#   annotation_mode用于指定该文件运行时计算的内容
#   annotation_mode为0代表整个标签处理过程，包括获得VOCdevkit/VOC2007/ImageSets里面的txt以及训练用的2007_train.txt、2007_val.txt
#   annotation_mode为1代表获得VOCdevkit/VOC2007/ImageSets里面的txt
#   annotation_mode为2代表获得训练用的2007_train.txt、2007_val.txt
#----------------------------------------------------------------------------------------------------#
annotation_mode     = 0
#-------------------------------------------------------------------#
#   必须要修改，用于生成2007_train.txt、2007_val.txt的目标信息
#   与训练和预测所用的classes_path一致即可
#   如果生成的2007_train.txt里面没有目标信息
#   那么就是因为classes没有设定正确
#   仅在annotation_mode为0和2的时候有效
#-------------------------------------------------------------------#
classes_path        = r'E:\ipynb\pyhton课设\VOC\voc_math\class.txt'
#---------------------------------------------------------------------------------------------------#
#   trainval_percent用于指定(训练集+验证集)与测试集的比例，默认情况下 (训练集+验证集):测试集 = 9:1
#   train_percent用于指定(训练集+验证集)中训练集与验证集的比例，默认情况下 训练集:验证集 = 9:1
#   仅在annotation_mode为0和1的时候有效
#----------------------------------------------------------------------------------------------------#
trainval_percent    = 0.9
train_percent       = 0.9
#-------------------------------------------------------#
#   指向VOC数据集所在的文件夹
#   默认指向根目录下的VOC数据集
#-------------------------------------------------------#
VOCdevkit_path  = r'E:\ipynb\pyhton课设\VOC\voc_math'

VOCdevkit_sets  = [('math', 'train'), ('math', 'val')]
classes, _      = get_classes(classes_path)

#-------------------------------------------------------#
#   统计目标数量
#-------------------------------------------------------#
photo_nums  = np.zeros(len(VOCdevkit_sets))
nums        = np.zeros(len(classes))
def convert_annotation(year, image_id, list_file):
    in_file = open(os.path.join(VOCdevkit_path, 'Annotations\\%s.xml'%image_id), encoding='utf-8')
    tree=ET.parse(in_file)
    root = tree.getroot()

    for obj in root.iter('object'):
        difficult = 0 
        if obj.find('difficult')!=None:
            difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
        b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
        
        nums[classes.index(cls)] = nums[classes.index(cls)] + 1
        
if __name__ == "__main__":
    random.seed(0)
    if " " in os.path.abspath(VOCdevkit_path):
        raise ValueError("数据集存放的文件夹路径与图片名称中不可以存在空格，否则会影响正常的模型训练，请注意修改。")

    if annotation_mode == 0 or annotation_mode == 1:
        print("Generate txt in ImageSets.")
        xmlfilepath     = os.path.join(VOCdevkit_path, 'Annotations')
        saveBasePath    = os.path.join(VOCdevkit_path, 'ImageSets/Main')
        temp_xml        = os.listdir(xmlfilepath)
        total_xml       = []
        for xml in temp_xml:
            if xml.endswith(".xml"):
                total_xml.append(xml)

        num     = len(total_xml)  
        list_i    = range(num)  
        tv      = int(num*trainval_percent)  
        tr      = int(tv*train_percent)  
        trainval= random.sample(list_i,tv)  
        train   = random.sample(trainval,tr)  
        
        print("train and val size",tv)
        print("train size",tr)
        ftrainval   = open(os.path.join(saveBasePath,'trainval.txt'), 'w')  
        ftest       = open(os.path.join(saveBasePath,'test.txt'), 'w')  
        ftrain      = open(os.path.join(saveBasePath,'train.txt'), 'w')  
        fval        = open(os.path.join(saveBasePath,'val.txt'), 'w')  
        
        for i in list_i:  
            name=total_xml[i][:-4]+'\n'  
            if i in trainval:  
                ftrainval.write(name)  
                if i in train:  
                    ftrain.write(name)  
                else:  
                    fval.write(name)  
            else:  
                ftest.write(name)  
        
        ftrainval.close()  
        ftrain.close()  
        fval.close()  
        ftest.close()
        print("Generate txt in ImageSets done.")

    if annotation_mode == 0 or annotation_mode == 2:
        print("Generate train.txt and val.txt for train.")
        type_index = 0
        for year, image_set in VOCdevkit_sets:
            image_ids = open(os.path.join(VOCdevkit_path, 'ImageSets\\Main\\%s.txt'%image_set), encoding='utf-8').read().strip().split()
            list_file = open('_%s.txt'%image_set, 'w', encoding='utf-8')
            for image_id in image_ids:
                list_file.write('%s\\JPEGImages\\%s.jpg'%(os.path.abspath(VOCdevkit_path), image_id))

                convert_annotation(year, image_id, list_file)
                list_file.write('\n')
            photo_nums[type_index] = len(image_ids)
            type_index += 1
            list_file.close()
        print("Generate train.txt and val.txt for train done.")
        
        def printTable(List1, List2):
            for i in range(len(List1[0])):
                print("|", end=' ')
                for j in range(len(List1)):
                    print(List1[j][i].rjust(int(List2[j])), end=' ')
                    print("|", end=' ')
                print()

        str_nums = [str(int(x)) for x in nums]
        tableData = [
            classes, str_nums
        ]
        colWidths = [0]*len(tableData)
        len1 = 0
        for i in range(len(tableData)):
            for j in range(len(tableData[i])):
                if len(tableData[i][j]) > colWidths[i]:
                    colWidths[i] = len(tableData[i][j])
        printTable(tableData, colWidths)

        if photo_nums[0] <= 500:
            print("训练集数量小于500，属于较小的数据量，请注意设置较大的训练世代（Epoch）以满足足够的梯度下降次数（Step）。")

        if np.sum(nums) == 0:
            print("在数据集中并未获得任何目标，请注意修改classes_path对应自己的数据集，并且保证标签名字正确，否则训练将会没有任何效果！")


Generate txt in ImageSets.
train and val size 60176
train size 54158
Generate txt in ImageSets done.
Generate train.txt and val.txt for train.
Generate train.txt and val.txt for train done.
|     0 | 4432 | 
|     1 | 7363 | 
|     2 | 5396 | 
|     3 | 5225 | 
|     4 | 5098 | 
|     5 | 3014 | 
|     6 | 2696 | 
|     7 | 2546 | 
|     8 | 2700 | 
|     9 | 3326 | 
|     [ |  694 | 
|     ] |  711 | 
|     + | 4556 | 
|     = | 4623 | 
|     - | 5360 | 
| times | 2436 | 


# coco

In [7]:
# 数据集路径和输出文件路径
dataset_path = r"E:\ipynb\datasets\math_need_to_use"  # 替换为你的数据集路径
output_yolo_train_path = "yolo_train_dataset"
output_yolo_val_path = "yolo_val_dataset"
output_yaml_path = "math_dataset_config.yaml"

# 创建 YOLO 输出目录
os.makedirs(output_yolo_train_path, exist_ok=True)
os.makedirs(output_yolo_val_path, exist_ok=True)

# 获取类别名称并分配类别 ID
category_names = sorted(os.listdir(dataset_path))  # 按文件夹名称排序
categories = {name: idx for idx, name in enumerate(category_names)}  # 确保是字典

# 设置训练集和验证集比例（80%训练集，20%验证集）
train_ratio = 0.8

annotation_id = 0

# 遍历每个类别文件夹
print("正在处理数据集...")

for category_name in tqdm(category_names, desc="处理类别文件夹"):
    category_id = categories[category_name]
    category_folder = os.path.join(dataset_path, category_name)

    if os.path.isdir(category_folder):
        # 遍历类别文件夹中的所有图片
        image_files = [f for f in os.listdir(category_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
        random.shuffle(image_files)  # 随机打乱顺序

        # 按比例划分训练集和验证集
        num_train = int(len(image_files) * train_ratio)
        train_images = image_files[:num_train]
        val_images = image_files[num_train:]
        
        # 处理训练集图片
        for image_filename in tqdm(train_images, desc=f"处理 {category_name} 训练集", leave=False):
            image_path = os.path.join(category_folder, image_filename)
        
            with Image.open(image_path) as img:
                # 复制图像，确保它是可写的
                img = img.copy()
                width, height = img.size
        
            # 获取 YOLO 格式的标注信息
            annotation = f"{category_id} 0.5 0.5 1 1\n"  # 将整个图片视为物体（标注为整张图片）
            image_name = os.path.splitext(image_filename)[0]
        
            # 将 YOLO 格式的标注信息写入 txt 文件
            annotation_file = os.path.join(output_yolo_train_path, f"{image_name}.txt")
            with open(annotation_file, 'w') as f:
                f.write(annotation)
        
            # 复制图片到目标目录
            output_image_path = os.path.join(output_yolo_train_path, image_filename)
            img.save(output_image_path)


        # 处理验证集图片
        for image_filename in tqdm(val_images, desc=f"处理 {category_name} 验证集", leave=False):
            image_path = os.path.join(category_folder, image_filename)

            with Image.open(image_path) as img:
                img = img.copy()
                width, height = img.size

            # 获取 YOLO 格式的标注信息
            annotation = f"{category_id} 0.5 0.5 1 1\n"  # 将整个图片视为物体（标注为整张图片）
            image_name = os.path.splitext(image_filename)[0]

            # 将 YOLO 格式的标注信息写入 txt 文件
            annotation_file = os.path.join(output_yolo_val_path, f"{image_name}.txt")
            with open(annotation_file, 'w') as f:
                f.write(annotation)

            # 复制图片到目标目录
            output_image_path = os.path.join(output_yolo_val_path, image_filename)
            img.save(output_image_path)

# 创建 YAML 配置文件
yaml_data = {
    
    "name": "数学符号数据集",
    "path": dataset_path,
    "nc":len(categories),
    "names": list(categories.keys()),
    "train": output_yolo_train_path,
    "val": output_yolo_val_path,

}

# 将 YAML 数据写入文件
print("正在保存 YAML 配置文件...")
with open(output_yaml_path, 'w', encoding='utf-8') as yaml_file:
    yaml.dump(yaml_data, yaml_file, allow_unicode=True)

print("YOLO 数据集和 YAML 配置文件生成成功！")


正在处理数据集...


处理类别文件夹:   0%|          | 0/16 [00:00<?, ?it/s]
处理 + 训练集:   0%|          | 0/4064 [00:00<?, ?it/s][A
处理 + 训练集:   3%|▎         | 116/4064 [00:00<00:03, 1153.76it/s][A
处理 + 训练集:   6%|▌         | 236/4064 [00:00<00:03, 1177.15it/s][A
处理 + 训练集:   9%|▉         | 378/4064 [00:00<00:02, 1284.23it/s][A
处理 + 训练集:  12%|█▏        | 507/4064 [00:00<00:02, 1268.85it/s][A
处理 + 训练集:  16%|█▌        | 634/4064 [00:00<00:02, 1253.45it/s][A
处理 + 训练集:  19%|█▉        | 777/4064 [00:00<00:02, 1312.16it/s][A
处理 + 训练集:  24%|██▎       | 964/4064 [00:00<00:02, 1492.33it/s][A
处理 + 训练集:  28%|██▊       | 1157/4064 [00:00<00:01, 1604.46it/s][A
处理 + 训练集:  32%|███▏      | 1318/4064 [00:00<00:02, 1312.41it/s][A
处理 + 训练集:  36%|███▌      | 1458/4064 [00:01<00:02, 1274.79it/s][A
处理 + 训练集:  40%|███▉      | 1623/4064 [00:01<00:01, 1374.00it/s][A
处理 + 训练集:  44%|████▍     | 1787/4064 [00:01<00:01, 1446.43it/s][A
处理 + 训练集:  48%|████▊     | 1937/4064 [00:01<00:01, 1427.14it/s][A
处理 + 训练集:  51%|█████▏    | 2083/40

正在保存 YAML 配置文件...
YOLO 数据集和 YAML 配置文件生成成功！





# json转yolo

In [3]:
import os
import json
from PIL import Image
from tqdm import tqdm

def convert_to_labelimg_format(json_data, image_width, image_height, class_mapping):
    labels = json_data["labels"]
    labeltxt_lines = []

    for label in labels:
        # 使用类别映射表，将类别名称转换为编号
        class_name = label["name"]
        class_id = class_mapping.get(class_name, -1)  # 如果没有找到该类别，则返回-1，表示未知类别
        x1, y1, x2, y2 = label["x1"], label["y1"], label["x2"], label["y2"]

        # 计算框的宽度和高度
        width = x2 - x1
        height = y2 - y1

        # 计算中心点
        x_center = (x1 + x2) / 2
        y_center = (y1 + y2) / 2

        # 归一化坐标和尺寸
        x_center_normalized = round(x_center / image_width, 6)
        y_center_normalized = round(y_center / image_height, 6)
        width_normalized = round(width / image_width, 6)
        height_normalized = round(height / image_height, 6)

        # 生成labelimg的格式，使用class_id代替class_name
        labeltxt_lines.append(f"{class_id} {x_center_normalized} {y_center_normalized} {width_normalized} {height_normalized}")

    return "\n".join(labeltxt_lines)

def get_image_size(image_path):
    """
    使用Pillow库获取图像的实际宽度和高度
    """
    with Image.open(image_path) as img:
        width, height = img.size
    return width, height

def process_folder(folder_path):
    # 获取文件夹中所有的JSON文件
    json_files = [f for f in os.listdir(folder_path) if f.endswith(".json")]
    
    # 用于存储所有的类别名称
    class_names = set()

    # 使用tqdm包对JSON文件进行进度条显示
    for filename in tqdm(json_files, desc="Processing files", unit="file"):
        # 获取同名图片文件的路径
        image_filename = filename.replace(".json", ".jpg")  # 假设图片格式为jpg，可以根据实际修改
        image_path = os.path.join(folder_path, image_filename)
        
        # 如果同名图片存在
        if os.path.exists(image_path):
            json_path = os.path.join(folder_path, filename)

            # 读取JSON文件
            with open(json_path, "r") as f:
                json_data = json.load(f)
            
            # 提取所有类别名称
            for label in json_data["labels"]:
                class_names.add(label["name"])

        else:
            print(f"Image file not found for {filename}")

    # 创建类别映射表（字典形式）
    class_mapping = {class_name: idx for idx, class_name in enumerate(sorted(class_names))}

    # 输出类别映射表到class.txt
    with open(os.path.join(folder_path, "class.txt"), "w") as class_file:
        for class_name, class_id in class_mapping.items():
            class_file.write(f"{class_id} {class_name}\n")

    # 处理文件夹中的所有JSON文件，将类别名称替换为编号
    for filename in tqdm(json_files, desc="Processing files", unit="file"):
        # 获取同名图片文件的路径
        image_filename = filename.replace(".json", ".jpg")  # 假设图片格式为jpg，可以根据实际修改
        image_path = os.path.join(folder_path, image_filename)
        
        if os.path.exists(image_path):
            json_path = os.path.join(folder_path, filename)

            # 读取JSON文件
            with open(json_path, "r") as f:
                json_data = json.load(f)
            
            # 获取图片的宽度和高度
            image_width, image_height = get_image_size(image_path)

            # 转换为labelImg格式的txt，使用类别编号
            labeltxt = convert_to_labelimg_format(json_data, image_width, image_height, class_mapping)

            # 保存为txt文件
            txt_filename = os.path.join(folder_path, filename.replace(".json", ".txt"))
            with open(txt_filename, "w") as txt_file:
                txt_file.write(labeltxt)

        else:
            print(f"Image file not found for {filename}")

# 输入文件夹路径
folder_path = r"E:\ipynb\datasets\classroom\example\classroom_train"  # 替换为你的文件夹路径

# 处理文件夹中的所有JSON文件
process_folder(folder_path)


Processing files: 100%|██████████| 8884/8884 [00:03<00:00, 2939.86file/s]
Processing files: 100%|██████████| 8884/8884 [00:12<00:00, 704.65file/s]


In [10]:
from ultralytics import YOLO
model = YOLO("yolov10n.pt")
results = model.train(data = r"E:\ipynb\pyhton课设\dataset\math_dataset_config.yaml", epochs=50, imgsz=640)

New https://pypi.org/project/ultralytics/8.3.32 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.29  Python-3.10.11 torch-2.5.1+cu124 CUDA:0 (NVIDIA GeForce RTX 3060 Laptop GPU, 6144MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov10n.pt, data=E:\ipynb\pyhton\dataset\math_dataset_config.yaml, epochs=50, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train5, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=No

100%|██████████| 5.35M/5.35M [00:00<00:00, 9.75MB/s]


[34m[1mAMP: [0mchecks passed 


[34m[1mtrain: [0mScanning E:\ipynb\pyhton课设\dataset\yolo_train_dataset... 54738 images, 0 backgrounds, 0 corrupt: 100%|██████████| 54738/54738 [00:19<00:00, 2863.93it/s]


[34m[1mtrain: [0mNew cache created: E:\ipynb\pyhton\dataset\yolo_train_dataset.cache


[34m[1mval: [0mScanning E:\ipynb\pyhton课设\dataset\yolo_val_dataset... 13778 images, 0 backgrounds, 0 corrupt: 100%|██████████| 13778/13778 [00:06<00:00, 2117.75it/s]


[34m[1mval: [0mNew cache created: E:\ipynb\pyhton\dataset\yolo_val_dataset.cache
Plotting labels to runs\detect\train5\labels.jpg... 
Unable to allocate 3.81 MiB for an array with shape (1000, 1000, 4) and data type uint8
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 95 weight(decay=0.0), 108 weight(decay=0.0005), 107 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns\detect\train5[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


  0%|          | 0/3422 [00:00<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 76.00 MiB. GPU 0 has a total capacity of 6.00 GiB of which 4.83 GiB is free. Of the allocated memory 139.10 MiB is allocated by PyTorch, and 18.90 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)