In [1]:
import pandas as pd
import os
import shutil

### 训练集

In [2]:
# 路径设置
source_folder = 'train\images'  # 您的图片文件夹路径
target_folder = '../dataset/train'  # 您想要存放新数据集的路径
label_file = 'train/train_label.csv'  # 标签文件的路径

# 读取标签数据
labels_df = pd.read_csv(label_file)

# 标签到文件夹名称的映射
label_to_folder = {
    "complex": "100000",
    "frog_eye_leaf_spot": "010000",
    "healthy": "001000",
    "powdery_mildew": "000100",
    "rust": "000010",
    "scab": "000001"
}

# 创建目标文件夹
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# 遍历每一行
for index, row in labels_df.iterrows():
    image_name = row['images']
    image_labels = row['labels'].split()

    # 对于每个标签，复制图片到相应的文件夹
    for label in image_labels:
        folder_name = label_to_folder.get(label, None)
        if folder_name:
            # 创建标签文件夹（如果不存在）
            label_folder = os.path.join(target_folder, folder_name)
            if not os.path.exists(label_folder):
                os.makedirs(label_folder)
            
            # 源图片和目标路径
            src_path = os.path.join(source_folder, image_name)
            dst_path = os.path.join(label_folder, image_name)

            # 复制图片
            shutil.copy(src_path, dst_path)

print("数据集转换完成！")

数据集转换完成！


In [3]:
# 路径设置
original_label_file = 'train/train_label.csv'  # 原始标签文件路径
output_file = 'train/OneHot-train_label.csv'  # 输出文件的路径

# 读取原始标签数据
original_labels_df = pd.read_csv(original_label_file)

# 初始化 one-hot 编码列
one_hot_encoded = pd.DataFrame({
    'images': original_labels_df['images'],
    'label_complex': 0,
    'label_frog_eye_leaf_spot': 0,
    'label_healthy': 0,
    'label_powdery_mildew': 0,
    'label_rust': 0,
    'label_scab': 0
})

# 定义标签
labels = {
    "complex": "label_complex",
    "frog_eye_leaf_spot": "label_frog_eye_leaf_spot",
    "healthy": "label_healthy",
    "powdery_mildew": "label_powdery_mildew",
    "rust": "label_rust",
    "scab": "label_scab"
}

# 转换为 one-hot 编码
for index, row in original_labels_df.iterrows():
    image_labels = row['labels'].split()
    for label in image_labels:
        if label in labels:
            one_hot_encoded.at[index, labels[label]] = 1

# 保存为 CSV 文件
one_hot_encoded.to_csv(output_file, index=False)

print("One-hot 编码文件已生成！")


One-hot 编码文件已生成！


### 验证集

In [4]:
# 路径设置
source_folder = 'val/images'  # 您的图片文件夹路径
target_folder = '../dataset/val'  # 您想要存放新数据集的路径
label_file = 'val/val_label.csv'  # 标签文件的路径

# 读取标签数据
labels_df = pd.read_csv(label_file)

# 标签到文件夹名称的映射
label_to_folder = {
    "complex": "100000",
    "frog_eye_leaf_spot": "010000",
    "healthy": "001000",
    "powdery_mildew": "000100",
    "rust": "000010",
    "scab": "000001"
}

# 创建目标文件夹
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# 遍历每一行
for index, row in labels_df.iterrows():
    image_name = row['images']
    image_labels = row['labels'].split()

    # 对于每个标签，复制图片到相应的文件夹
    for label in image_labels:
        folder_name = label_to_folder.get(label, None)
        if folder_name:
            # 创建标签文件夹（如果不存在）
            label_folder = os.path.join(target_folder, folder_name)
            if not os.path.exists(label_folder):
                os.makedirs(label_folder)
            
            # 源图片和目标路径
            src_path = os.path.join(source_folder, image_name)
            dst_path = os.path.join(label_folder, image_name)

            # 复制图片
            shutil.copy(src_path, dst_path)

print("数据集转换完成！")


数据集转换完成！


In [5]:
# 路径设置
original_label_file = 'val/val_label.csv'  # 原始标签文件路径
output_file = 'val/OneHot-val_label.csv'  # 输出文件的路径

# 读取原始标签数据
original_labels_df = pd.read_csv(original_label_file)

# 初始化 one-hot 编码列
one_hot_encoded = pd.DataFrame({
    'images': original_labels_df['images'],
    'label_complex': 0,
    'label_frog_eye_leaf_spot': 0,
    'label_healthy': 0,
    'label_powdery_mildew': 0,
    'label_rust': 0,
    'label_scab': 0
})

# 定义标签
labels = {
    "complex": "label_complex",
    "frog_eye_leaf_spot": "label_frog_eye_leaf_spot",
    "healthy": "label_healthy",
    "powdery_mildew": "label_powdery_mildew",
    "rust": "label_rust",
    "scab": "label_scab"
}

# 转换为 one-hot 编码
for index, row in original_labels_df.iterrows():
    image_labels = row['labels'].split()
    for label in image_labels:
        if label in labels:
            one_hot_encoded.at[index, labels[label]] = 1

# 保存为 CSV 文件
one_hot_encoded.to_csv(output_file, index=False)

print("One-hot 编码文件已生成！")


One-hot 编码文件已生成！


## 测试集转换

In [6]:
# 路径设置
source_folder = 'test/images'  # 您的图片文件夹路径
target_folder = '../dataset/test'  # 您想要存放新数据集的路径
label_file = 'test/test_label.csv'  # 标签文件的路径

# 读取标签数据
labels_df = pd.read_csv(label_file)

# 标签到文件夹名称的映射
label_to_folder = {
    "complex": "100000",
    "frog_eye_leaf_spot": "010000",
    "healthy": "001000",
    "powdery_mildew": "000100",
    "rust": "000010",
    "scab": "000001"
}

# 创建目标文件夹
if not os.path.exists(target_folder):
    os.makedirs(target_folder)

# 遍历每一行
for index, row in labels_df.iterrows():
    image_name = row['images']
    image_labels = row['labels'].split()

    # 对于每个标签，复制图片到相应的文件夹
    for label in image_labels:
        folder_name = label_to_folder.get(label, None)
        if folder_name:
            # 创建标签文件夹（如果不存在）
            label_folder = os.path.join(target_folder, folder_name)
            if not os.path.exists(label_folder):
                os.makedirs(label_folder)
            
            # 源图片和目标路径
            src_path = os.path.join(source_folder, image_name)
            dst_path = os.path.join(label_folder, image_name)

            # 复制图片
            shutil.copy(src_path, dst_path)

print("数据集转换完成！")


数据集转换完成！


In [7]:
# 路径设置
original_label_file = 'test/test_label.csv'  # 原始标签文件路径
output_file = 'test/OneHot-test_label.csv'  # 输出文件的路径

# 读取原始标签数据
original_labels_df = pd.read_csv(original_label_file)

# 初始化 one-hot 编码列
one_hot_encoded = pd.DataFrame({
    'images': original_labels_df['images'],
    'label_complex': 0,
    'label_frog_eye_leaf_spot': 0,
    'label_healthy': 0,
    'label_powdery_mildew': 0,
    'label_rust': 0,
    'label_scab': 0
})

# 定义标签
labels = {
    "complex": "label_complex",
    "frog_eye_leaf_spot": "label_frog_eye_leaf_spot",
    "healthy": "label_healthy",
    "powdery_mildew": "label_powdery_mildew",
    "rust": "label_rust",
    "scab": "label_scab"
}

# 转换为 one-hot 编码
for index, row in original_labels_df.iterrows():
    image_labels = row['labels'].split()
    for label in image_labels:
        if label in labels:
            one_hot_encoded.at[index, labels[label]] = 1

# 保存为 CSV 文件
one_hot_encoded.to_csv(output_file, index=False)

print("One-hot 编码文件已生成！")


One-hot 编码文件已生成！
