In [31]:
import torch
from torch.utils.data import Dataset
import numpy as np
import os
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.transforms import ToTensor

char_to_index = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9,
                 'a': 10, 'e': 11, 'i': 12, 'm': 13, 'n': 14, 'x': 15, '-': 16, ':': 17}

class CharacterDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        image = self.images[idx]
        label = char_to_index[self.labels[idx]]

        # 将图像转换为PIL Image格式，以便应用变换
        image = Image.fromarray(image).convert('L')  # 转换为灰度图
        if self.transform:
            image = self.transform(image)

        return image, label

# 假设你已经分割了字符并有了它们的标签
char_images = []  # 存储所有分割后的字符图像
labels = []       # 存储对应字符的标签


In [32]:
for f in os.listdir('train_data'):
    image = Image.open(os.path.join('train_data', f))
    char_images.append(np.array(image))
    labels.append(f.split('.')[0])

In [33]:
# 创建数据集
transform = transforms.Compose([
    transforms.Resize((28, 28)),  # 根据你的模型输入调整尺寸
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = CharacterDataset(char_images, labels, transform)

# 使用DataLoader进行批处理和打乱数据
data_loader = DataLoader(dataset, batch_size=64, shuffle=True)

## 开始进行模型训练

In [36]:
# 2. 定义模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 18)  # 输出层，16个类别（10个数字+6个字母）

    def forward(self, x):
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = nn.functional.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = nn.functional.log_softmax(x, dim=1)
        return output

# 3. 训练模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data =  data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = nn.functional.nll_loss(output, target)
        loss.backward()
        optimizer.step()

# 4. 评估模型
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += nn.functional.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


In [37]:
# 创建数据加载器
train_loader = data_loader
test_loader = data_loader
# 开始训练
for epoch in range(1, 11):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)


Test set: Average loss: 2.8038, Accuracy: 1/18 (6%)


Test set: Average loss: 2.6542, Accuracy: 4/18 (22%)


Test set: Average loss: 2.4904, Accuracy: 9/18 (50%)


Test set: Average loss: 2.2808, Accuracy: 12/18 (67%)


Test set: Average loss: 2.0079, Accuracy: 13/18 (72%)


Test set: Average loss: 1.6876, Accuracy: 15/18 (83%)


Test set: Average loss: 1.3476, Accuracy: 17/18 (94%)


Test set: Average loss: 1.0225, Accuracy: 18/18 (100%)


Test set: Average loss: 0.7450, Accuracy: 18/18 (100%)


Test set: Average loss: 0.5315, Accuracy: 18/18 (100%)



## 保存模型

In [38]:
torch.save(model.state_dict(), 'model.pth')

In [39]:
new_model = Net()
new_model.load_state_dict(torch.load('model.pth'))
new_model.eval()

Net(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=18, bias=True)
)

## 预测

In [40]:
import torch
from torchvision import transforms

# 定义与训练时相同的预处理变换
transform = transforms.Compose([
    transforms.Resize((28, 28)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [43]:
# 加载图片进行预测
   cfe = transform(c'har_')
 ge = char_image.unsqueeze(0)

    wi'data', f))
    char_image = np.array(image)
    char_image = Image.fromarray(char_image).convert('L')
    char_image = transform(char_image)
    char_image = char_image.unsqueeze(0)
ted label: {predicted_label}')
    
    # 修改new_文件名为标签char_imageme(os.path.join('data', f), os.path.join('data', f'{pred        print(f'Predicted label: {predicted_label}')
icted_label修改文件名为'))    os.rename(os.path.join('data', f), os.path.join('data', f'.png'))

Predicted label: 8
Predicted label: 9
Predicted label: 14
Predicted label: 15
Predicted label: 17
Predicted label: 16
Predicted label: 12
Predicted label: 13
Predicted label: 11
Predicted label: 10
Predicted label: 4
Predicted label: 5
Predicted label: 7
Predicted label: 6
Predicted label: 2
Predicted label: 3
Predicted label: 1
Predicted label: 0
