### 识别手写数字算法

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms

# 定义数据变换
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图像转换为 Tensor
    transforms.Normalize((0.5,), (0.5,))  # 归一化到 [-1, 1]
])

# 下载和加载训练集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

# 下载和加载测试集
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

# 检查数据集
print("Training set size:", len(trainset))
print("Testing set size:", len(testset))

# 检查 DataLoader
for images, labels in trainloader:
    print("Batch shape:", images.shape)
    print("Labels shape:", labels.shape)
    break  # 只检查第一个批次

Training set size: 60000
Testing set size: 10000
Batch shape: torch.Size([64, 1, 28, 28])
Labels shape: torch.Size([64])


In [2]:
import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


2.5.1
12.4
True
NVIDIA GeForce GTX 1660 SUPER


In [1]:
import torch

# 检查是否有可用的 GPU
if torch.cuda.is_available():
    print("CUDA is available!")
else:
    print("CUDA is not available.")

CUDA is available!


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# 定义数据变换
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图像转换为 Tensor
    transforms.Normalize((0.5,), (0.5,))  # 归一化到 [-1, 1]
])

# 使用gpu设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 下载和加载训练集
trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

# 下载和加载测试集
testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)
# 定义卷积神经网络模型
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 一次卷积
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        # 第二次卷积
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        # 池化
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # 线性化
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        # 一次卷积、池化
        x = self.pool(self.relu(self.conv1(x)))
        # 第二次卷积、池化
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # 展平特征图
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


# 初始化模型
net = Net()
net.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        # 将数据移动到设备
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        # 反向传播
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / (i+1):.4f}")
# 评估模型
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # 将数据移动到设备
        images, labels = images.to(device), labels.to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%")


Using device: cuda
Epoch [1/10], Loss: 0.3131
Epoch [2/10], Loss: 0.1147
Epoch [3/10], Loss: 0.0846
Epoch [4/10], Loss: 0.0694
Epoch [5/10], Loss: 0.0578
Epoch [6/10], Loss: 0.0511
Epoch [7/10], Loss: 0.0449
Epoch [8/10], Loss: 0.0365
Epoch [9/10], Loss: 0.0348
Epoch [10/10], Loss: 0.0290
Accuracy of the network on the 10000 test images: 98.63%


In [3]:
# 获取每一层的参数
for name, param in net.named_parameters():
    if param.requires_grad:
        print(f"Layer: {name}")
        print(f"Shape: {param.data.shape}")
        print(f"Weights: {param.data}\n")

Layer: conv1.weight
Shape: torch.Size([32, 1, 3, 3])
Weights: tensor([[[[-1.5241e-01,  1.9503e-01,  2.9506e-01],
          [-3.4260e-01,  2.2655e-01, -3.2656e-03],
          [ 2.4507e-01,  3.7953e-01, -2.2848e-01]]],


        [[[-1.9374e-01, -3.6419e-01, -3.7965e-01],
          [ 2.2273e-01, -2.7766e-01,  2.3187e-01],
          [ 2.0008e-01,  5.8296e-01, -7.4937e-02]]],


        [[[-3.1235e-01, -4.6888e-01, -2.5889e-01],
          [-3.3509e-01,  1.5747e-01, -1.4213e-01],
          [ 4.8283e-01,  4.0216e-01,  3.5739e-01]]],


        [[[ 3.8435e-01,  2.2883e-01, -3.8671e-01],
          [-2.5230e-01,  6.9204e-01,  3.5845e-01],
          [-5.2684e-01, -3.5736e-01,  3.7743e-01]]],


        [[[ 2.0163e-01, -1.0509e-01,  4.4767e-01],
          [-1.8327e-01, -8.2945e-03,  8.3378e-02],
          [-2.3038e-01, -1.0218e-01,  3.8530e-02]]],


        [[[-5.0703e-01, -7.5556e-02,  2.2841e-01],
          [-2.5348e-01,  5.9550e-01,  1.6105e-01],
          [ 3.1421e-01,  3.2434e-01, -6.4775e-01]]]

In [4]:
from PIL import Image
import matplotlib.pyplot as plt


# 使用gpu设备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def predict_image(image_path, model, transform):
    # 加载图片
    image = Image.open(image_path).convert('L')  # 转换为灰度图像
    image = transform(image).unsqueeze(0)  # 转换为 Tensor 并增加 batch 维度
    image = image.to(device)

    # 使用模型进行预测
    with torch.no_grad():
        model.to(device)
        output = model(image)
        _, predicted = torch.max(output.data, 1)
    
    return predicted.item()

# 指定图片路径
image_path = 'D:\code\python-learning\mnist\generic\\7.png'

# 进行预测
result = []
for i in range(100):
    predicted_digit = predict_image(image_path, net, transform)
    print(f"Predicted digit: {predicted_digit}")
    result.append(predicted_digit)
value_to_count = 7
count = result.count(value_to_count)
print(f"The value {value_to_count} appears {count} times in the list.") 

Using device: cuda
Predicted digit: 8
Predicted digit: 8
Predicted digit: 5
Predicted digit: 8
Predicted digit: 8
Predicted digit: 7
Predicted digit: 5
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 7
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 9
Predicted digit: 8
Predicted digit: 8
Predicted digit: 0
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 9
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 5
Predicted digit: 8
Predicted digit: 8
Predicted digit: 8
Predicted digit: 9
Predicted digit: 3
Predicted digit: 3
Predicted digit: 8
Predicted di