# CNN 手寫數字辨識實做
此專項將使pytorch套件進行訓練與辨識
該程式可以分為以下步驟
1. 事前準備＆資料處理
2. 模型建構
3. 訓練參數設定
4. 訓練
5. 測試

## 事前準備&資料處理
在訓練前，要先載入AI套件（簡化資料處理與訓練過程）此部份使用"pytorch"進行訓練與資料處理

我們須載入以下套件

1. torch ->pytorch核心
2. torch.nn ->AI模型套件（原torch內已存在此處為方便後續引用因此特別提出）
3. torch.optim ->AI模型優化套件（原torch內已存在此處為方便後續引用因此特別提出）
4. torch.utils.data -> 訓練資料處里套件（原torch內已存在此處為方便後續引用因此特別提出）
5. torchvision ->pytorch影像處里套件



In [1]:
# 載入ai套件-pytorch
import torch
#載入torch.nn的類別(class)，並在此程式中以nn替代
import torch.nn as nn
#載入torch.optim的類別(class)，並在此程式中以optim替代
import torch.optim as optim
#載入torch.utils的類別(class)，並在此程式中以optim替代
import torch.utils.data as dset
#pytorch影像處里套件
from torchvision import datasets, transforms

查看目前設備使否有可用之GPU

另外可以使用-！nvidia-smi查看GPU參數

In [2]:
!nvidia-smi

Sun Aug 28 07:27:48 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   52C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# 查看目前可否使用GPU
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('GPU State:', device)

GPU State: cuda:0


In [4]:
# 宣告資料處理方式
# 1.轉換格式成pytorch格式
# 2.標準化
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,), (0.5,)),])

下載手寫數字辨識資料集

並使用DataLoader將資料載入，並切分batch_size

In [32]:
# 下載資料
trainSet = datasets.MNIST(root='MNIST', download=True, train=True, transform=transform)
testSet = datasets.MNIST(root='MNIST', download=True, train=False, transform=transform)
trainLoader = dset.DataLoader(trainSet, batch_size=64, shuffle=True)
testLoader = dset.DataLoader(testSet, batch_size=64, shuffle=False)

## 模型建構

建立模型，使用nn.sequential建立AI模型
本章提供2種模型供測試

1.使用全連接層

2.cnn+全連接層



In [33]:
# Model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(in_features=784, out_features=128),
            nn.Linear(in_features=128, out_features=64),
            nn.Linear(in_features=64, out_features=10),
            nn.LogSoftmax(dim=1)
        )

    def forward(self, input):
        return self.main(input)


net = Net().to(device)
print(net)

Net(
  (main): Sequential(
    (0): Linear(in_features=784, out_features=128, bias=True)
    (1): Linear(in_features=128, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=10, bias=True)
    (3): LogSoftmax(dim=1)
  )
)


In [55]:
# Model
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=3,              
                stride=1,                   
                padding=1,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(
                in_channels=16,              
                out_channels=32,            
                kernel_size=3,              
                stride=1,                   
                padding=1,                  
            ),     
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),                
        )
        self.conv3 = nn.Sequential(         
            nn.Conv2d(
                in_channels=32,              
                out_channels=64,            
                kernel_size=3,              
                stride=1,                   
                padding=1,                  
            ),     
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),                
        )
        # fully connected layer, output 10 classes
        self.out = nn.Sequential(         
            nn.Linear(576, 10),     
            nn.LogSoftmax(dim=1)
        )
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output  
net = Net().to(device)
print(net)

Net(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Sequential(
    (0): Linear(in_features=576, out_features=10, bias=True)
    (1): LogSoftmax(dim=1)
  )
)


## 訓練


*   **注意，使用兩種不同模型時，程式有部分需進行修改**
* **使用CNN時，請將"inputs = inputs.view(inputs.shape[0], -1)"進行註解或刪除**





In [56]:
# Train
# Parameters
#宣告要使用資料集訓練幾次
epochs = 3
#設定學習率(不建議設定超過0.5，太大會影像辨識成功率，太小會學太慢)
lr = 0.002
#設定誤差計算方式(大多數分類問題使用交叉熵損失)
criterion = nn.CrossEntropyLoss()
#設定模型權重優化策略&方式
optimizer = optim.Adam(net.parameters(), lr=lr)
#開始進行訓練
for epoch in range(epochs):
    running_loss = 0.0
    #
    for times, data in enumerate(trainLoader):
        #從trainLoader中拿取訓練資料與真實值，並將其放入GPU中("to(device)"")
        inputs, labels = data[0].to(device), data[1].to(device)
        #使用CNN時請用"#"將下列程式註解
        #inputs = inputs.view(inputs.shape[0], -1)

        # Zero the parameter gradients
        #初始化優化器的梯度
        optimizer.zero_grad()
        #將圖輸入進模型中，進行預測
        outputs = net(inputs)
        #計算與真實值間的誤差
        loss = criterion(outputs, labels)
        #反向傳播梯度&更新權重
        loss.backward()
        optimizer.step()

        # Print statistics
        #誤差加總(方便訓練人員進行觀測)&顯示
        running_loss += loss.item()
        if times % 100 == 99 or times+1 == len(trainLoader):
            print('[%d/%d, %d/%d] loss: %.3f' % (epoch+1, epochs, times+1, len(trainLoader), running_loss/2000))

print('Training Finished.')

[1/3, 100/938] loss: 0.037
[1/3, 200/938] loss: 0.045
[1/3, 300/938] loss: 0.051
[1/3, 400/938] loss: 0.056
[1/3, 500/938] loss: 0.060
[1/3, 600/938] loss: 0.064
[1/3, 700/938] loss: 0.067
[1/3, 800/938] loss: 0.070
[1/3, 900/938] loss: 0.073
[1/3, 938/938] loss: 0.074
[2/3, 100/938] loss: 0.003
[2/3, 200/938] loss: 0.005
[2/3, 300/938] loss: 0.008
[2/3, 400/938] loss: 0.009
[2/3, 500/938] loss: 0.011
[2/3, 600/938] loss: 0.014
[2/3, 700/938] loss: 0.016
[2/3, 800/938] loss: 0.018
[2/3, 900/938] loss: 0.020
[2/3, 938/938] loss: 0.021
[3/3, 100/938] loss: 0.002
[3/3, 200/938] loss: 0.003
[3/3, 300/938] loss: 0.005
[3/3, 400/938] loss: 0.006
[3/3, 500/938] loss: 0.008
[3/3, 600/938] loss: 0.010
[3/3, 700/938] loss: 0.012
[3/3, 800/938] loss: 0.013
[3/3, 900/938] loss: 0.015
[3/3, 938/938] loss: 0.016
Training Finished.


## 測試

*   **注意，使用兩種不同模型時，程式有部分需進行修改**
* **使用CNN時，請將"inputs = inputs.view(inputs.shape[0], -1)"進行註解或刪除**

In [58]:
#測試整體準確度
#創建計數器，紀錄測試總數數與正確判斷數
correct = 0
total = 0

#宣告以下程式不計算梯度(加快速度&避免不小心動到權重)
with torch.no_grad():
    for data in testLoader:
        #從testLoader中拿取訓練資料與真實值，並將其放入GPU中("to(device)"")
        inputs, labels = data[0].to(device), data[1].to(device)
        #使用CNN時請用"#"將下列程式註解，其用意是將圖像轉換成1維資訊
        #inputs = inputs.view(inputs.shape[0], -1)
        #將圖輸入進模型中，進行預測
        outputs = net(inputs)
        #將所預測之最大結果是為預測類別
        _, predicted = torch.max(outputs.data, 1)
        #計算總數&正確數
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
#顯示正確數/總數比率
print('Accuracy of the network on the 10000 test images: %d %%' % (100*correct / total))

#測試個別準確度
#創建計數器，紀錄個數字出現次數與正確判斷數
class_correct = [0 for i in range(10)]
class_total = [0 for i in range(10)]
#宣告以下程式不計算梯度
with torch.no_grad():
    for data in testLoader:
        #從testLoader中拿取訓練資料與真實值，並將其放入GPU中("to(device)"")
        inputs, labels = data[0].to(device), data[1].to(device)
        #使用CNN時請用"#"將下列程式註解
        #inputs = inputs.view(inputs.shape[0], -1)
        #將圖輸入進模型中，進行預測
        outputs = net(inputs)
        #將所預測之最大結果是為預測類別
        _, predicted = torch.max(outputs, 1)
        #計算正確數
        c = (predicted == labels).squeeze()
        #統計每一類別正確數與數量
        for i in range(10):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1

#顯示各類別正確數/總數比率
for i in range(10):
    print('Accuracy of %d: %3f' % (i, (class_correct[i]/class_total[i])))

Accuracy of the network on the 10000 test images: 98 %
Accuracy of 0: 0.986842
Accuracy of 1: 0.983784
Accuracy of 2: 0.982558
Accuracy of 3: 0.967949
Accuracy of 4: 0.988701
Accuracy of 5: 0.984127
Accuracy of 6: 0.992188
Accuracy of 7: 0.993902
Accuracy of 8: 0.986014
Accuracy of 9: 0.976048
