# 测试数据集

In [58]:
# Load pickled data
import pickle

# TODO: Fill this in based on where you saved the training and testing data

training_file = "./data/train.p"
validation_file = "./data/valid.p"
testing_file = "./data/test.p"

with open(training_file, mode='rb') as f:
    train = pickle.load(f)
with open(validation_file, mode='rb') as f:
    valid = pickle.load(f)
with open(testing_file, mode='rb') as f:
    test = pickle.load(f)
    
X_train, y_train = train['features'], train['labels']
X_valid, y_valid = valid['features'], valid['labels']
X_test, y_test = test['features'], test['labels']

In [59]:
X_train.shape

(34799, 32, 32, 3)

In [60]:
y_train.shape

(34799,)

In [61]:
y_train[20:]

array([41, 41, 41, ..., 25, 25, 25], dtype=uint8)

## 准备数据集

In [62]:
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
# import matplotlib.pyplot as plt

In [63]:
class traffic_sign_dataset(Dataset):
    '''
    numpy to Dataset
    '''
    def __init__(self, X, y):
        self.len = len(X)
        self.features = torch.tensor(torch.from_numpy(X), dtype=torch.float32)
        self.target = torch.tensor(torch.from_numpy(y), dtype=torch.long)
        
    def __getitem__(self, index):
        # 网络需要(batch, 3, n, n)形式，需要调整一下
        sample = self.features[index].permute(2, 0, 1)
        return sample, self.target[index]

    def __len__(self):
        return self.len

原始数据转换为Dataset

In [64]:
train_set = traffic_sign_dataset(X = X_train, y = y_train)
valid_set = traffic_sign_dataset(X = X_valid, y = y_valid)
test_set = traffic_sign_dataset(X = X_test, y = y_test)
print(len(train_set))
print(len(valid_set))
print(len(test_set))

  import sys
  


34799
4410
12630


Dataset转换为DataLoader

In [65]:
BATCH_SIZE = 16
train_set_iter = DataLoader(dataset=train_set,
                            batch_size=BATCH_SIZE,
                            shuffle=True)
valid_set_iter = DataLoader(valid_set, 
                            batch_size=BATCH_SIZE,
                            shuffle=True)
print('开始训练')
# for batch_index, (x, y) in enumerate(train_set_iter):
#     print('batch_index:', batch_index)
#     print(x.shape)
#     print(y.shape)

test_set_iter = DataLoader(dataset=test_set,
                            batch_size=BATCH_SIZE,
                            shuffle=True)
print('开始测试')
# for batch_index, (x, y) in enumerate(test_set_iter):
#     print('batch_index:', batch_index)
#     print(x.shape)
#     print(y.shape)

开始训练
开始测试


## 模型创建

In [66]:
import torch.nn as nn
import torch.optim as optim

In [67]:
# 定义LeNet模型
class LeNet(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, num_classes)

    def forward(self, x):
        x = torch.nn.functional.relu(self.conv1(x))
        x = torch.nn.functional.max_pool2d(x, 2)
        x = torch.nn.functional.relu(self.conv2(x))
        x = torch.nn.functional.max_pool2d(x, 2)
        x = x.view(-1, 16*5*5)
        x = torch.nn.functional.relu(self.fc1(x))
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [68]:
# 创建LeNet模型实例
lenet_model = LeNet(num_classes=43)  # 这里有43个路牌类别

## 训练

In [69]:
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(lenet_model.parameters(), lr=0.001, momentum=0.9)

In [70]:
# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
    lenet_model.train()  # 设置为训练模式
    for inputs, labels in train_set_iter:
        optimizer.zero_grad()
        outputs = lenet_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    # 在每个epoch结束后评估模型在验证集上的性能
    lenet_model.eval()  # 设置为评估模式
    total_correct = 0
    total_samples = 0
    with torch.no_grad():
        for inputs, labels in valid_set_iter:
            outputs = lenet_model(inputs)
            _, predicted = torch.max(outputs, 1)
            total_correct += (predicted == labels).sum().item()
            total_samples += labels.size(0)
    
    accuracy = total_correct / total_samples
    print(f'Epoch [{epoch+1}/{num_epochs}], Validation Accuracy: {accuracy:.4f}')

print('Training Finished!')

Epoch [1/10], Validation Accuracy: 0.7952
Epoch [2/10], Validation Accuracy: 0.8558
Epoch [3/10], Validation Accuracy: 0.8172
Epoch [4/10], Validation Accuracy: 0.8819
Epoch [5/10], Validation Accuracy: 0.8565
Epoch [6/10], Validation Accuracy: 0.8771
Epoch [7/10], Validation Accuracy: 0.8272
Epoch [8/10], Validation Accuracy: 0.8562
Epoch [9/10], Validation Accuracy: 0.6141
Epoch [10/10], Validation Accuracy: 0.7875
Training Finished!


## 测试

In [71]:
lenet_model.eval()  # 设置为评估模式
total_correct = 0
total_samples = 0
with torch.no_grad():
    for inputs, labels in test_set_iter:
        outputs = lenet_model(inputs)
        _, predicted = torch.max(outputs, 1)               # 找到类别
        total_correct += (predicted == labels).sum().item()
        total_samples += labels.size(0)

accuracy = total_correct / total_samples
print(f'Test Accuracy: {accuracy:.4f}')

Test Accuracy: 0.7704
