In [1]:
#调用的库
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import os
from PIL import Image
from torch.utils.data import Dataset
import torch.nn.functional as F


In [2]:
# 设置设备（CPU或GPU）
device = torch.device("cuda")

In [3]:
# 数据预处理
batch_sizes =64
height = 64
width = 64
preprocess = transforms.Compose([
    transforms.Resize((height, width)),  # 调整图像大小
    transforms.Grayscale(num_output_channels=1),  # 转换为灰度图像
    transforms.ToTensor(),  # 转换为张量
        transforms.Normalize(mean=[0.5], std=[0.5]) # 标准化
])

In [4]:
# 自定义数据处理函数，将 PIL 图像转换为张量
def custom_collate_fn(batch):
    images, labels = zip(*batch)
    images = [transforms.ToTensor()(image) for image in images]
    return images, labels

class TravelDatasets(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []

        # 遍历数据集文件夹，获取图像文件和对应的标签文件
        for filename in os.listdir(root_dir):
            if filename.endswith(".jpg"):
                image_path = os.path.join(root_dir, filename)
                label_path = os.path.join(root_dir, filename.replace(".jpg", ".txt"))
                self.image_paths.append(image_path)
                self.labels.append(label_path)

        self.label_to_int = {}  # 为训练数据集创建标签到整数的映射
        for i, label in enumerate(self.labels):
            with open(label, 'r', encoding='utf-8') as file:
                text = file.read()
                if text not in self.label_to_int:
                    self.label_to_int[text] = i

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        label_path = self.labels[idx]

        # 加载图像
        image = Image.open(image_path)

        # 加载标签，使用 'utf-8' 编码
        with open(label_path, 'r', encoding='utf-8') as file:
            label = file.read()

        if self.transform:
            image = self.transform(image)

        return image, self.label_to_int[label]

# 创建自定义数据集
root_dir = r'D:\Ai_Project_Tool\Jupyter_notebook\汉字识别GPU版本\汉字数据集\精简版1.0'
custom_dataset = TravelDatasets(root_dir=root_dir, transform=None)

# 创建数据加载器，指定自定义的数据处理函数
data_loader = DataLoader(custom_dataset, batch_size=4, shuffle=True, collate_fn=custom_collate_fn)

# 创建测试数据集
test_root_dir = r'D:\Ai_Project_Tool\Jupyter_notebook\汉字识别GPU版本\汉字数据集\精简版1.0test'
default_label_to_int = {}  # 在这里设置默认映射，未知标签将映射为-1
test_dataset = TravelDatasets(root_dir=test_root_dir, transform=preprocess)

# 创建测试数据加载器
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)

# 获取样本
for images, labels in data_loader:
    # 图像和标签在images和labels中
    print("Label:", labels)
    # 图像显示出来
    print("Image:", images)
    break  # 退出循循环，获取样本

Label: (48, 97, 43, 90)
Image: [tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         ...,
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.],
         [1., 1., 1.,  ..., 1., 1., 1.]]]), tensor([[[1.0000, 1.0000, 0.9922,  ..., 1.0000, 1.0000, 1.0000],
         [0.9961, 0.9882, 0.9882,  ..., 1.0000, 1.0000, 1.0000],
         [0.9961, 1.0000, 0.9922,  ..., 1.0000, 1.0000, 1.0000],
         ...,
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
         [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000]]]), tensor([[[1., 1., 1.,  ..., 1., 1.,

In [5]:
# 创建数据集
custom_dataset = TravelDatasets(root_dir, transform=preprocess)

# 创建数据加载器
data_loader = DataLoader(custom_dataset, batch_size=batch_sizes, shuffle=True)

In [6]:
# 构建深度学习模型
class HandwritingRecognizer_chinese(nn.Module):
    def __init__(self, num_classes):
        # 调用父类构造函数
        super(HandwritingRecognizer_chinese, self).__init__()
        
        # 卷积层1：1个输入通道，32个输出通道，3x3卷积核，填充1
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        
        # 最大池化层：2x2的池化核，步幅2
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # 全连接层1：32*32*32个输入特征，128个输出特征
        self.fc1 = nn.Linear(32 * 32 * 32, 128)
        
        # 全连接层2：128个输入特征，num_classes个输出特征（根据分类数目）
        self.fc2 = nn.Linear(128, num_classes)
        
        # 修改激活函数为 Leaky ReLU
        # 0.1 是斜率，你可以根据需求调整
        self.leaky_relu = nn.LeakyReLU(0.1)

    def forward(self, x):
        # 应用卷积1和Leaky ReLU，然后进行最大池化
        x = self.pool(self.leaky_relu(self.conv1(x)))
        
        # 将特征张量展平
        x = x.view(x.size(0), -1)
        
        # 应用全连接层1和Leaky ReLU
        x = self.leaky_relu(self.fc1(x))
        
        # 应用全连接层2
        x = self.fc2(x)
        
        return x

In [7]:
# 创建模型实例
num_classes = 100# 你的数据集中有n个不同的手写汉字
model = HandwritingRecognizer_chinese(num_classes=num_classes)
model.to(device)

HandwritingRecognizer_chinese(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=32768, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=100, bias=True)
  (leaky_relu): LeakyReLU(negative_slope=0.1)
)

In [8]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [13]:
# 训练模型
num_epochs = 100
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0.0

    for images, labels in data_loader:
        images = images.to(device, dtype=torch.float32)
        labels = torch.tensor([int(label) for label in labels]).to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    average_loss = epoch_loss / len(data_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}] Average Loss: {average_loss:.4f}")

Epoch [1/250] Average Loss: 0.0039
Epoch [2/250] Average Loss: 0.0038
Epoch [3/250] Average Loss: 0.0038
Epoch [4/250] Average Loss: 0.0038
Epoch [5/250] Average Loss: 0.0038
Epoch [6/250] Average Loss: 0.0037
Epoch [7/250] Average Loss: 0.0037
Epoch [8/250] Average Loss: 0.0036
Epoch [9/250] Average Loss: 0.0037
Epoch [10/250] Average Loss: 0.0035
Epoch [11/250] Average Loss: 0.0036
Epoch [12/250] Average Loss: 0.0035
Epoch [13/250] Average Loss: 0.0034
Epoch [14/250] Average Loss: 0.0035
Epoch [15/250] Average Loss: 0.0034
Epoch [16/250] Average Loss: 0.0033
Epoch [17/250] Average Loss: 0.0033
Epoch [18/250] Average Loss: 0.0033
Epoch [19/250] Average Loss: 0.0032
Epoch [20/250] Average Loss: 0.0032
Epoch [21/250] Average Loss: 0.0032
Epoch [22/250] Average Loss: 0.0032
Epoch [23/250] Average Loss: 0.0031
Epoch [24/250] Average Loss: 0.0031
Epoch [25/250] Average Loss: 0.0030
Epoch [26/250] Average Loss: 0.0031
Epoch [27/250] Average Loss: 0.0030
Epoch [28/250] Average Loss: 0.0030
E

Epoch [227/250] Average Loss: 0.0009
Epoch [228/250] Average Loss: 0.0008
Epoch [229/250] Average Loss: 0.0008
Epoch [230/250] Average Loss: 0.0008
Epoch [231/250] Average Loss: 0.0008
Epoch [232/250] Average Loss: 0.0008
Epoch [233/250] Average Loss: 0.0008
Epoch [234/250] Average Loss: 0.0008
Epoch [235/250] Average Loss: 0.0008
Epoch [236/250] Average Loss: 0.0008
Epoch [237/250] Average Loss: 0.0008
Epoch [238/250] Average Loss: 0.0008
Epoch [239/250] Average Loss: 0.0008
Epoch [240/250] Average Loss: 0.0008
Epoch [241/250] Average Loss: 0.0008
Epoch [242/250] Average Loss: 0.0008
Epoch [243/250] Average Loss: 0.0008
Epoch [244/250] Average Loss: 0.0008
Epoch [245/250] Average Loss: 0.0008
Epoch [246/250] Average Loss: 0.0008
Epoch [247/250] Average Loss: 0.0008
Epoch [248/250] Average Loss: 0.0008
Epoch [249/250] Average Loss: 0.0008
Epoch [250/250] Average Loss: 0.0008


In [14]:
# 保存模型
torch.save(model.state_dict(), "handwriting_recognizer_chinese_beta1.0.pth")