In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import os
import shutil
from PIL import Image

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 数据预处理
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# 加载数据集
dataset = datasets.ImageFolder(root='/home/ubuntu/IndustrialDigitDatasetGenerator/classification_dataset', transform=data_transforms)

# 划分训练集和验证集
train_size = int(0.7 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# 数据加载器
# train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
# val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=4)

# 打印数据集大小
print(f'训练集大小: {train_size}')
print(f'验证集大小: {val_size}')

# # 打印具体分类的大小
# train_class_counts = {class_name: 0 for class_name in dataset.classes}
# val_class_counts = {class_name: 0 for class_name in dataset.classes}

# for _, label in train_dataset:
#     train_class_counts[dataset.classes[label]] += 1

# for _, label in val_dataset:
#     val_class_counts[dataset.classes[label]] += 1

# print("训练集分类大小:")
# for class_name, count in train_class_counts.items():
#     print(f'{class_name}: {count}')

# print("验证集分类大小:")
# for class_name, count in val_class_counts.items():
#     print(f'{class_name}: {count}')



  from .autonotebook import tqdm as notebook_tqdm


训练集大小: 14006
验证集大小: 6003


In [2]:
# 加载预训练的 ResNet50 模型
# model = models.resnet50(pretrained=True)
# model = models.resnet18(pretrained=True)
model = models.efficientnet_b0(pretrained=True)
# model = models.mnasnet1_0(weights=models.MNASNet1_0_Weights.DEFAULT)
# model = models.maxvit_t(weights=models.MaxVit_T_Weights.DEFAULT)

# 打印模型的输入向量的大小
# print(f"模型的输入向量大小: {model.fc.in_features}")

# 打印模型的细节
# print(model)



In [4]:
def train(model, train_dataset, val_dataset, num_epochs=10, batch_size=64, learning_rate=0.001, save_interval=2, device='cuda'):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # 数据加载器
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    
    best_acc = 0.0
    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        
        # 训练阶段
        model.train()
        running_loss = 0.0
        running_corrects = 0
        
        train_loader_tqdm = tqdm(train_loader, desc="训练进度", leave=False)
        
        for inputs, labels in train_loader_tqdm:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            _, preds = torch.max(outputs, 1)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
            train_loader_tqdm.set_postfix({'Loss': loss.item()})
        
        epoch_loss = running_loss / len(train_dataset)
        epoch_acc = running_corrects.double() / len(train_dataset)
        print(f'训练损失: {epoch_loss:.4f} 训练准确率: {epoch_acc:.4f}')
        
        # 验证阶段
        model.eval()
        val_running_loss = 0.0
        val_running_corrects = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                _, preds = torch.max(outputs, 1)
                val_running_loss += loss.item() * inputs.size(0)
                val_running_corrects += torch.sum(preds == labels.data)
        
        val_loss = val_running_loss / len(val_dataset)
        val_acc = val_running_corrects.double() / len(val_dataset)
        print(f'验证损失: {val_loss:.4f} 验证准确率: {val_acc:.4f}')
        
        # 保存最佳模型
        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), 'best_model.pth')
            print('已保存最佳模型：best_model.pth')
        
        # 每隔 save_interval 轮保存一次模型
        if (epoch + 1) % save_interval == 0:
            checkpoint_path = f'checkpoint_epoch_{epoch+1}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'已保存模型到 {checkpoint_path}')

In [5]:
# 修改最后的全连接层
if hasattr(model, 'fc'):
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, 10)  # 10 类分类
elif hasattr(model, 'Linear'):
    num_ftrs = model.Linear.in_features
    model.Linear = nn.Linear(num_ftrs, 10)  # 10 类分类

# num_ftrs = model.fc.in_features
# model.fc = nn.Linear(num_ftrs, 10)  # 10 类分类

train(model, 
      train_dataset, 
      val_dataset, 
      num_epochs=5, 
      batch_size=256, 
      learning_rate=0.001, 
      save_interval=2, 
      device=device)


Epoch 1/5


                                                                      

训练损失: 0.6249 训练准确率: 0.8812




验证损失: 0.0413 验证准确率: 0.9872
已保存最佳模型：best_model.pth
Epoch 2/5


                                                                      

训练损失: 0.0244 训练准确率: 0.9917




验证损失: 0.0325 验证准确率: 0.9900
已保存最佳模型：best_model.pth
已保存模型到 checkpoint_epoch_2.pth
Epoch 3/5


                                                                      

训练损失: 0.0160 训练准确率: 0.9941




验证损失: 0.0247 验证准确率: 0.9922
已保存最佳模型：best_model.pth
Epoch 4/5


                                                                      

训练损失: 0.0132 训练准确率: 0.9951




验证损失: 0.0293 验证准确率: 0.9920
已保存模型到 checkpoint_epoch_4.pth
Epoch 5/5


                                                                       

训练损失: 0.0104 训练准确率: 0.9971




验证损失: 0.0356 验证准确率: 0.9902


In [6]:
import time

# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载模型
model.load_state_dict(torch.load('best_model.pth'))  # 加载训练好的模型权重
model.eval()
model = model.to(device)
# 数据预处理（与训练时相同）
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])
# 类别名称列表（根据你的实际类别名称修改）
class_names = ['0', '1', '2', '3', '4','5', '6', '7', '8', '9']
# 测试图像文件夹路径
test_dir = '/home/ubuntu/IndustrialDigitDatasetGenerator/sigle_num'  # 替换为你的测试图像文件夹路径
# 结果保存文件夹路径
output_dir = '/home/ubuntu/IndustrialDigitDatasetGenerator/sigle_num_result'  # 替换为你想要保存预测结果的文件夹路径

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

with torch.no_grad():
    for img_name in os.listdir(test_dir):
        img_path = os.path.join(test_dir, img_name)
        # 打开图像并预处理
        image = Image.open(img_path).convert('RGB')
        input_tensor = data_transforms(image).unsqueeze(0).to(device)
        
        # 开始计时
        start_time = time.time()
        
        # 模型预测
        outputs = model(input_tensor)
        _, preds = torch.max(outputs, 1)
        print(preds)
        predicted_class = class_names[preds.item()]
        
        # 结束计时
        end_time = time.time()
        processing_time = end_time - start_time
        
        # 新的文件名，包含预测结果
        new_filename = f"{predicted_class}_{img_name}"
        output_path = os.path.join(output_dir, new_filename)
        
        # 将图像复制到结果文件夹，并重命名
        shutil.copy(img_path, output_path)
        
        print(f"已处理：{img_name} -> {new_filename}，处理时间：{processing_time*1000:.2f} ms")


tensor([2], device='cuda:0')
已处理：02.bmp -> 2_02.bmp，处理时间：48.19 ms
tensor([7], device='cuda:0')
已处理：1 - 副本.bmp -> 7_1 - 副本.bmp，处理时间：9.26 ms
tensor([3], device='cuda:0')
已处理：num.bmp -> 3_num.bmp，处理时间：9.75 ms
tensor([7], device='cuda:0')
已处理：17.bmp -> 7_17.bmp，处理时间：9.16 ms
tensor([2], device='cuda:0')
已处理：11.bmp -> 2_11.bmp，处理时间：8.88 ms
tensor([9], device='cuda:0')
已处理：09.bmp -> 9_09.bmp，处理时间：9.01 ms


  model.load_state_dict(torch.load('best_model.pth'))  # 加载训练好的模型权重


tensor([8], device='cuda:0')
已处理：08.bmp -> 8_08.bmp，处理时间：11.06 ms
tensor([7], device='cuda:0')
已处理：15_待识别数字_0.jpg -> 7_15_待识别数字_0.jpg，处理时间：9.49 ms
tensor([4], device='cuda:0')
已处理：04.bmp -> 4_04.bmp，处理时间：13.02 ms
tensor([2], device='cuda:0')
已处理：1.bmp -> 2_1.bmp，处理时间：9.01 ms
tensor([3], device='cuda:0')
已处理：33.bmp -> 3_33.bmp，处理时间：13.56 ms
tensor([1], device='cuda:0')
已处理：11 (2).bmp -> 1_11 (2).bmp，处理时间：9.32 ms
tensor([1], device='cuda:0')
已处理：01.bmp -> 1_01.bmp，处理时间：9.30 ms
tensor([2], device='cuda:0')
已处理：2.jpg -> 2_2.jpg，处理时间：11.41 ms
tensor([0], device='cuda:0')
已处理：10.bmp -> 0_10.bmp，处理时间：9.03 ms
tensor([2], device='cuda:0')
已处理：12.bmp -> 2_12.bmp，处理时间：9.43 ms
tensor([5], device='cuda:0')
已处理：process12000.png -> 5_process12000.png，处理时间：11.62 ms
tensor([4], device='cuda:0')
已处理：10 - 副本.bmp -> 4_10 - 副本.bmp，处理时间：10.03 ms
tensor([2], device='cuda:0')
已处理：15_待识别数字_1.jpg -> 2_15_待识别数字_1.jpg，处理时间：9.51 ms
tensor([9], device='cuda:0')
已处理：9.jpg -> 9_9.jpg，处理时间：9.11 ms
tensor([1], device='

In [7]:
import importlib
import model_evaluation

# 重新加载模块
importlib.reload(model_evaluation)

# 重新导入 ModelEvaluator 类
from model_evaluation import ModelEvaluator
# 设置设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 加载模型（需要你的模型定义）
model.load_state_dict(torch.load('best_model.pth'))
model = model.to(device)

# 类别名称
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

# 路径设置
test_dir = '/home/ubuntu/IndustrialDigitDatasetGenerator/classification_dataset_test'
output_dir = '/home/ubuntu/IndustrialDigitDatasetGenerator/'

# 创建评估器
evaluator = ModelEvaluator(model, device, class_names, test_dir, output_dir)

# 运行评估
print("开始模型评估...")
evaluator.evaluate_model()

# 生成混淆矩阵
# print("生成混淆矩阵...")
evaluator.plot_confusion_matrix()

# 打印分类报告
evaluator.print_classification_report()

# 生成示例图片网格
# print("生成示例图片...")
success_grid = evaluator.create_example_grid(evaluator.success_examples, True)
failed_grid = evaluator.create_example_grid(evaluator.failed_examples, False)

import cv2
if success_grid is not None:
    cv2.imwrite(os.path.join(output_dir, 'success_examples.png'), 
                cv2.cvtColor(success_grid, cv2.COLOR_RGB2BGR))
if failed_grid is not None:
    cv2.imwrite(os.path.join(output_dir, 'failed_examples.png'), 
                cv2.cvtColor(failed_grid, cv2.COLOR_RGB2BGR))

    print(f"评估完成！结果保存在: {output_dir}")

  model.load_state_dict(torch.load('best_model.pth'))


开始模型评估...

分类报告:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94        32
           1       0.79      1.00      0.88        78
           2       1.00      0.94      0.97        36
           3       1.00      0.78      0.88        36
           4       1.00      0.79      0.88        29
           5       1.00      0.97      0.98        30
           6       1.00      1.00      1.00        18
           7       0.93      0.81      0.87        16
           8       1.00      0.85      0.92        13
           9       1.00      1.00      1.00        10

    accuracy                           0.92       298
   macro avg       0.97      0.91      0.93       298
weighted avg       0.93      0.92      0.92       298

评估完成！结果保存在: /home/ubuntu/IndustrialDigitDatasetGenerator/
