### 识别手写数字算法

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms

# 定义数据变换
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图像转换为 Tensor
    transforms.Normalize((0.5,), (0.5,))  # 归一化到 [-1, 1]
])

# 下载和加载训练集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

# 下载和加载测试集
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# 检查数据集
print("Training set size:", len(trainset))
print("Testing set size:", len(testset))

# 检查 DataLoader
for images, labels in trainloader:
    print("Batch shape:", images.shape)
    print("Labels shape:", labels.shape)
    break  # 只检查第一个批次

Training set size: 60000
Testing set size: 10000
Batch shape: torch.Size([64, 1, 28, 28])
Labels shape: torch.Size([64])


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# 定义数据变换
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图像转换为 Tensor
    transforms.Normalize((0.5,), (0.5,))  # 归一化到 [-1, 1]
])

# 下载和加载训练集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

# 下载和加载测试集
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# 定义卷积神经网络模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 一次卷积
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        # 第二次卷积
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        # 池化
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 线性化
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        # 一次卷积、池化
        x = self.pool(self.relu(self.conv1(x)))
        # 第二次卷积、池化
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # 展平特征图
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# 初始化模型
net = Net()

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        # 反向传播
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / (i+1):.4f}")
# 评估模型
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")


In [48]:
# 获取每一层的参数
for name, param in net.named_parameters():
    if param.requires_grad:
        print(f"Layer: {name}")
        print(f"Shape: {param.data.shape}")
        print(f"Weights: {param.data}\n")

Layer: conv1.weight
Shape: torch.Size([32, 1, 3, 3])
Weights: tensor([[[[-0.1766, -0.2352,  0.0445],
          [-0.3913, -0.1076,  0.3257],
          [-0.1492,  0.3113,  0.2624]]],


        [[[ 0.0468,  0.1832, -0.2399],
          [ 0.0366,  0.0669, -0.1865],
          [ 0.0260, -0.1794, -0.0126]]],


        [[[ 0.0405,  0.0462,  0.4592],
          [ 0.3414,  0.3049, -0.1502],
          [-0.3780, -0.5000, -0.3352]]],


        [[[ 0.1693, -0.0236,  0.0198],
          [ 0.3172, -0.2540, -0.0958],
          [-0.1598, -0.1140,  0.1568]]],


        [[[-0.1536, -0.2270, -0.2156],
          [ 0.1956, -0.0897, -0.2720],
          [ 0.3450,  0.0517, -0.0759]]],


        [[[ 0.0064,  0.2910,  0.2376],
          [ 0.0507,  0.0218, -0.0645],
          [-0.2449, -0.2841,  0.0963]]],


        [[[ 0.4135,  0.3929, -0.0891],
          [-0.1040,  0.1426,  0.3841],
          [-0.2231,  0.0866, -0.0713]]],


        [[[-0.0350,  0.0225, -0.3128],
          [-0.0656, -0.2292,  0.2660],
          [-0

In [136]:
from PIL import Image
import matplotlib.pyplot as plt

def predict_image(image_path, model, transform):
    # 加载图片
    image = Image.open(image_path).convert('L')  # 转换为灰度图像
    image = transform(image).unsqueeze(0)  # 转换为 Tensor 并增加 batch 维度

    # 使用模型进行预测
    with torch.no_grad():
        output = model(image)
        _, predicted = torch.max(output.data, 1)
    
    return predicted.item()

# 指定图片路径
image_path = 'D:\\tmp\\2.jpg'

# 进行预测
predicted_digit = predict_image(image_path, net, transform)
print(f"Predicted digit: {predicted_digit}")

Predicted digit: 8
