# 文字認識モデルの学習


In [1]:
import pandas as pd
from PIL import Image
import torch
import torchvision
from torchvision import transforms as transforms
import numpy as np
from torch.utils.data import TensorDataset,DataLoader

# 全結合層と活性化関数
from torch import nn
from torch.nn import functional as F

# 損失関数と最適化関数
from torch import optim

import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

  warn(


/pictures にあるinformation.csvから画像名と画像のラベルのセットを読み込む

In [2]:
df = pd.read_csv("pictures/information.csv")
df = df[["filename","label"]]
filelist=df.values.tolist()


画像の前処理を行う
この際にオーグメンテーションをして学習データを増やす

In [4]:
#リストの初期化
images = []
labels = []


#リサイズ後の画像のサイズを決めておく
common_size = (28, 28)

#オーグメンテーションの手順 
transform_augment = transforms.Compose([
    transforms.Resize(common_size),
    #transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.Grayscale(),
    transforms.ToTensor(),
])

# グレースケール化
transform_normal = transforms.Compose([
    transforms.Resize(common_size),
    transforms.Grayscale(),
    transforms.ToTensor(),
])

# 画像の読み込み
for filename, label in filelist:
    if filename[-4:] == ".png":
        p = Image.open('./pictures/' + filename)
    else:
        p = Image.open('./pictures/' + filename + ".png")
    
    # オーギュメンテーションをしてリストに追加
    for i in range(3):
        p_augmented = transform_augment(p)
        images.append(p_augmented)
        labels.append(torch.tensor(label, dtype=torch.int64))


    
    #元画像もリストに追加
    p_gray = transform_normal(p)
    images.append(p_gray)
    labels.append(torch.tensor(label, dtype=torch.int64))

images = torch.stack(images, dim=0)
labels = torch.stack(labels, dim=0)

# データセットの作成
dataset = TensorDataset(images, labels)

# 学習データテストデータに分ける
r = 0.9
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [int(r*len(dataset)), len(dataset) - int(r*len(dataset))])

#データローダーの準備
batch_size = 27
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset), shuffle=False)

In [5]:
train_dataset[0]

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.1961, 0.5020, 0.5020, 0.5020, 0.5020, 0.4235,
           0.0000, 0.0000, 0.0000, 0.0000, 

認識モデルの定義

In [6]:
class MNISTModel(nn.Module):
    def __init__(self):
        super(MNISTModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = nn.functional.relu(x)
        x = self.conv2(x)
        x = nn.functional.relu(x)
        x = nn.functional.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = nn.functional.log_softmax(x, dim=1)
        return output

モデルの学習を行う

In [7]:
from tqdm import tqdm

model = MNISTModel()

criterion = nn.NLLLoss()

optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

for epoch in range(100):
    total_loss = 0
    for images, labels in tqdm(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    # loss は平均を取って表示する
    print(f'Epoch: {epoch + 1}, Loss: {total_loss / len(train_loader)}')

# Torch Script形式で保存
#model_scripted = torch.jit.script(model)
#model_scripted.save('DSST_model/DSST_model.pth')
torch.save(model.state_dict(), 'letter_recognition_model.pth')

100%|██████████| 5/5 [00:00<00:00, 73.04it/s]


Epoch: 1, Loss: 2.302913951873779


100%|██████████| 5/5 [00:00<00:00, 100.17it/s]


Epoch: 2, Loss: 2.2963420867919924


100%|██████████| 5/5 [00:00<00:00, 104.08it/s]


Epoch: 3, Loss: 2.2746722221374513


100%|██████████| 5/5 [00:00<00:00, 76.44it/s]


Epoch: 4, Loss: 2.2603461742401123


100%|██████████| 5/5 [00:00<00:00, 99.00it/s]


Epoch: 5, Loss: 2.247250127792358


100%|██████████| 5/5 [00:00<00:00, 73.49it/s]


Epoch: 6, Loss: 2.226385259628296


100%|██████████| 5/5 [00:00<00:00, 87.61it/s]


Epoch: 7, Loss: 2.2175230503082277


100%|██████████| 5/5 [00:00<00:00, 91.93it/s]


Epoch: 8, Loss: 2.193064546585083


100%|██████████| 5/5 [00:00<00:00, 99.11it/s]


Epoch: 9, Loss: 2.177097272872925


100%|██████████| 5/5 [00:00<00:00, 102.05it/s]


Epoch: 10, Loss: 2.1458945751190184


100%|██████████| 5/5 [00:00<00:00, 71.51it/s]


Epoch: 11, Loss: 2.1211651802062987


100%|██████████| 5/5 [00:00<00:00, 104.63it/s]


Epoch: 12, Loss: 2.0720028400421144


100%|██████████| 5/5 [00:00<00:00, 101.00it/s]


Epoch: 13, Loss: 2.037067699432373


100%|██████████| 5/5 [00:00<00:00, 75.78it/s]


Epoch: 14, Loss: 2.008179450035095


100%|██████████| 5/5 [00:00<00:00, 99.32it/s]


Epoch: 15, Loss: 1.9484083890914916


100%|██████████| 5/5 [00:00<00:00, 99.90it/s]


Epoch: 16, Loss: 1.8827815055847168


100%|██████████| 5/5 [00:00<00:00, 94.85it/s]


Epoch: 17, Loss: 1.8201515197753906


100%|██████████| 5/5 [00:00<00:00, 101.35it/s]


Epoch: 18, Loss: 1.7230286836624145


100%|██████████| 5/5 [00:00<00:00, 99.61it/s]


Epoch: 19, Loss: 1.6635619878768921


100%|██████████| 5/5 [00:00<00:00, 83.63it/s]


Epoch: 20, Loss: 1.5735529899597167


100%|██████████| 5/5 [00:00<00:00, 136.81it/s]


Epoch: 21, Loss: 1.4596830129623413


100%|██████████| 5/5 [00:00<00:00, 100.07it/s]


Epoch: 22, Loss: 1.3288496494293214


100%|██████████| 5/5 [00:00<00:00, 95.30it/s]


Epoch: 23, Loss: 1.2453060388565063


100%|██████████| 5/5 [00:00<00:00, 103.72it/s]


Epoch: 24, Loss: 1.0967896223068236


100%|██████████| 5/5 [00:00<00:00, 98.10it/s]


Epoch: 25, Loss: 1.0940465569496154


100%|██████████| 5/5 [00:00<00:00, 100.73it/s]


Epoch: 26, Loss: 0.9892951369285583


100%|██████████| 5/5 [00:00<00:00, 101.29it/s]


Epoch: 27, Loss: 0.8214579105377198


100%|██████████| 5/5 [00:00<00:00, 99.27it/s]


Epoch: 28, Loss: 0.8046620965003968


100%|██████████| 5/5 [00:00<00:00, 100.35it/s]


Epoch: 29, Loss: 0.7666269659996032


100%|██████████| 5/5 [00:00<00:00, 100.54it/s]


Epoch: 30, Loss: 0.6695594668388367


100%|██████████| 5/5 [00:00<00:00, 140.01it/s]


Epoch: 31, Loss: 0.6576108038425446


100%|██████████| 5/5 [00:00<00:00, 97.14it/s]


Epoch: 32, Loss: 0.5938882172107697


100%|██████████| 5/5 [00:00<00:00, 96.91it/s]


Epoch: 33, Loss: 0.4875186741352081


100%|██████████| 5/5 [00:00<00:00, 101.84it/s]


Epoch: 34, Loss: 0.46526511907577517


100%|██████████| 5/5 [00:00<00:00, 99.82it/s]


Epoch: 35, Loss: 0.39862269163131714


100%|██████████| 5/5 [00:00<00:00, 95.64it/s]


Epoch: 36, Loss: 0.33447914123535155


100%|██████████| 5/5 [00:00<00:00, 137.55it/s]


Epoch: 37, Loss: 0.292542564868927


100%|██████████| 5/5 [00:00<00:00, 100.08it/s]


Epoch: 38, Loss: 0.2924980491399765


100%|██████████| 5/5 [00:00<00:00, 99.97it/s]


Epoch: 39, Loss: 0.3085373491048813


100%|██████████| 5/5 [00:00<00:00, 100.25it/s]


Epoch: 40, Loss: 0.2494054913520813


100%|██████████| 5/5 [00:00<00:00, 100.06it/s]


Epoch: 41, Loss: 0.2207823008298874


100%|██████████| 5/5 [00:00<00:00, 99.02it/s]


Epoch: 42, Loss: 0.19370177686214446


100%|██████████| 5/5 [00:00<00:00, 96.42it/s]


Epoch: 43, Loss: 0.2156069755554199


100%|██████████| 5/5 [00:00<00:00, 101.09it/s]


Epoch: 44, Loss: 0.21627711951732637


100%|██████████| 5/5 [00:00<00:00, 100.49it/s]


Epoch: 45, Loss: 0.16864478439092637


100%|██████████| 5/5 [00:00<00:00, 100.14it/s]


Epoch: 46, Loss: 0.17818254381418228


100%|██████████| 5/5 [00:00<00:00, 140.51it/s]


Epoch: 47, Loss: 0.1671994924545288


100%|██████████| 5/5 [00:00<00:00, 100.02it/s]


Epoch: 48, Loss: 0.12117039114236831


100%|██████████| 5/5 [00:00<00:00, 100.25it/s]


Epoch: 49, Loss: 0.14500208497047423


100%|██████████| 5/5 [00:00<00:00, 97.30it/s]


Epoch: 50, Loss: 0.16469761580228806


100%|██████████| 5/5 [00:00<00:00, 98.65it/s]


Epoch: 51, Loss: 0.15335605591535567


100%|██████████| 5/5 [00:00<00:00, 102.12it/s]


Epoch: 52, Loss: 0.11272610351443291


100%|██████████| 5/5 [00:00<00:00, 100.99it/s]


Epoch: 53, Loss: 0.12302820682525635


100%|██████████| 5/5 [00:00<00:00, 99.75it/s]


Epoch: 54, Loss: 0.09758355021476746


100%|██████████| 5/5 [00:00<00:00, 99.76it/s]


Epoch: 55, Loss: 0.08605529591441155


100%|██████████| 5/5 [00:00<00:00, 100.57it/s]


Epoch: 56, Loss: 0.12094819843769074


100%|██████████| 5/5 [00:00<00:00, 102.11it/s]


Epoch: 57, Loss: 0.10274459272623063


100%|██████████| 5/5 [00:00<00:00, 99.81it/s]


Epoch: 58, Loss: 0.10201260074973106


100%|██████████| 5/5 [00:00<00:00, 101.59it/s]


Epoch: 59, Loss: 0.07270846739411355


100%|██████████| 5/5 [00:00<00:00, 99.66it/s]


Epoch: 60, Loss: 0.08238690048456192


100%|██████████| 5/5 [00:00<00:00, 97.35it/s]


Epoch: 61, Loss: 0.07931499630212784


100%|██████████| 5/5 [00:00<00:00, 102.65it/s]


Epoch: 62, Loss: 0.07955485321581364


100%|██████████| 5/5 [00:00<00:00, 99.69it/s]


Epoch: 63, Loss: 0.0659985177218914


100%|██████████| 5/5 [00:00<00:00, 99.71it/s]


Epoch: 64, Loss: 0.07186243683099747


100%|██████████| 5/5 [00:00<00:00, 135.90it/s]


Epoch: 65, Loss: 0.07322002351284027


100%|██████████| 5/5 [00:00<00:00, 100.16it/s]


Epoch: 66, Loss: 0.06299299746751785


100%|██████████| 5/5 [00:00<00:00, 96.80it/s]


Epoch: 67, Loss: 0.05521068871021271


100%|██████████| 5/5 [00:00<00:00, 97.16it/s]


Epoch: 68, Loss: 0.0693807139992714


100%|██████████| 5/5 [00:00<00:00, 106.83it/s]


Epoch: 69, Loss: 0.059299233555793765


100%|██████████| 5/5 [00:00<00:00, 100.71it/s]


Epoch: 70, Loss: 0.05647378191351891


100%|██████████| 5/5 [00:00<00:00, 102.09it/s]


Epoch: 71, Loss: 0.037501109018921855


100%|██████████| 5/5 [00:00<00:00, 100.27it/s]


Epoch: 72, Loss: 0.04300767555832863


100%|██████████| 5/5 [00:00<00:00, 99.47it/s]


Epoch: 73, Loss: 0.05514207594096661


100%|██████████| 5/5 [00:00<00:00, 97.09it/s]


Epoch: 74, Loss: 0.05041320249438286


100%|██████████| 5/5 [00:00<00:00, 100.87it/s]


Epoch: 75, Loss: 0.037297696247696875


100%|██████████| 5/5 [00:00<00:00, 101.75it/s]


Epoch: 76, Loss: 0.04105065502226353


100%|██████████| 5/5 [00:00<00:00, 100.51it/s]


Epoch: 77, Loss: 0.028303782641887664


100%|██████████| 5/5 [00:00<00:00, 97.95it/s]


Epoch: 78, Loss: 0.036428584903478625


100%|██████████| 5/5 [00:00<00:00, 98.34it/s]


Epoch: 79, Loss: 0.061772701889276506


100%|██████████| 5/5 [00:00<00:00, 101.45it/s]


Epoch: 80, Loss: 0.048012275621294974


100%|██████████| 5/5 [00:00<00:00, 102.17it/s]


Epoch: 81, Loss: 0.042245395667850974


100%|██████████| 5/5 [00:00<00:00, 99.99it/s]


Epoch: 82, Loss: 0.037216370925307274


100%|██████████| 5/5 [00:00<00:00, 99.87it/s]


Epoch: 83, Loss: 0.05102623328566551


100%|██████████| 5/5 [00:00<00:00, 102.53it/s]


Epoch: 84, Loss: 0.04076467752456665


100%|██████████| 5/5 [00:00<00:00, 100.76it/s]


Epoch: 85, Loss: 0.036664316430687904


100%|██████████| 5/5 [00:00<00:00, 97.83it/s]


Epoch: 86, Loss: 0.027798908203840254


100%|██████████| 5/5 [00:00<00:00, 100.78it/s]


Epoch: 87, Loss: 0.033318524807691575


100%|██████████| 5/5 [00:00<00:00, 99.73it/s]


Epoch: 88, Loss: 0.030842720344662667


100%|██████████| 5/5 [00:00<00:00, 77.96it/s]


Epoch: 89, Loss: 0.047069551050662996


100%|██████████| 5/5 [00:00<00:00, 99.91it/s]


Epoch: 90, Loss: 0.0214884327724576


100%|██████████| 5/5 [00:00<00:00, 99.64it/s]


Epoch: 91, Loss: 0.0328838262706995


100%|██████████| 5/5 [00:00<00:00, 99.83it/s]


Epoch: 92, Loss: 0.031884912960231306


100%|██████████| 5/5 [00:00<00:00, 100.04it/s]


Epoch: 93, Loss: 0.03470580205321312


100%|██████████| 5/5 [00:00<00:00, 99.60it/s]


Epoch: 94, Loss: 0.01866409331560135


100%|██████████| 5/5 [00:00<00:00, 97.19it/s]


Epoch: 95, Loss: 0.038257437758147717


100%|██████████| 5/5 [00:00<00:00, 100.99it/s]


Epoch: 96, Loss: 0.038994062971323726


100%|██████████| 5/5 [00:00<00:00, 100.63it/s]


Epoch: 97, Loss: 0.024892658367753027


100%|██████████| 5/5 [00:00<00:00, 76.08it/s]


Epoch: 98, Loss: 0.03810674846172333


100%|██████████| 5/5 [00:00<00:00, 99.44it/s]


Epoch: 99, Loss: 0.02875652089715004


100%|██████████| 5/5 [00:00<00:00, 99.52it/s]

Epoch: 100, Loss: 0.021084033139050006





モデルの精度を評価する

In [8]:
correct = 0
total = 0

# Torch Script形式で読み込み
test_model = MNISTModel()
test_model.load_state_dict(torch.load('letter_recognition_model.pth', map_location=device))
test_model.eval()
#model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total}%")


Accuracy: 100.0%
