# 1. Загрузка и анализ данных

In [2]:
import torch
import torchvision

import pandas as pd
import numpy as np
import random
import time
import csv
import os
from collections import Counter

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
BATCH_SIZE = 128
IMG_SIZE = 224

transforms = torchvision.transforms.Compose([
    torchvision.transforms.Grayscale(3),
    torchvision.transforms.Resize((IMG_SIZE, IMG_SIZE)),
    torchvision.transforms.ToTensor()
])

train_dataset = torchvision.datasets.EMNIST(root='data', split='letters', train=True,
                                            transform=transforms, download=True)
test_dataset = torchvision.datasets.EMNIST(root='data', split='letters', train=False,
                                           transform=transforms, download=True)

train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

len(train_dataset), len(test_dataset)

(124800, 20800)

In [4]:
Counter([label for _, label in train_dataset]), len(train_dataset.classes)

(Counter({23: 4800,
          7: 4800,
          16: 4800,
          15: 4800,
          17: 4800,
          13: 4800,
          11: 4800,
          22: 4800,
          24: 4800,
          10: 4800,
          14: 4800,
          18: 4800,
          21: 4800,
          26: 4800,
          19: 4800,
          5: 4800,
          2: 4800,
          25: 4800,
          9: 4800,
          12: 4800,
          1: 4800,
          8: 4800,
          4: 4800,
          3: 4800,
          20: 4800,
          6: 4800}),
 27)

In [5]:
Counter([label for _, label in test_dataset]), len(test_dataset.classes)

(Counter({1: 800,
          2: 800,
          3: 800,
          4: 800,
          5: 800,
          6: 800,
          7: 800,
          8: 800,
          9: 800,
          10: 800,
          11: 800,
          12: 800,
          13: 800,
          14: 800,
          15: 800,
          16: 800,
          17: 800,
          18: 800,
          19: 800,
          20: 800,
          21: 800,
          22: 800,
          23: 800,
          24: 800,
          25: 800,
          26: 800}),
 27)

In [6]:
train_dataset.classes

['N/A',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

Как видно, в случае EMNIST Letters Dataset дисбаланса классов нет, всего в каждом из датасетов по 27 классов (26 букв и 'N/A').

# 2. ResNet 18

In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'mps' if torch.mps.is_available() else 'cpu'
device

'cpu'

In [5]:
def model_training(model, train, test, trainer, device, num_epochs):
  loss = torch.nn.CrossEntropyLoss(reduction='sum')

  models_metrics = []
  models_metrics.append(['model', 'epoch', 'train_loss', 'train_acc', 'test_acc'])

  folder_name, model_name = 'metrics', model.__class__.__name__
  file_name = f'{folder_name}/{model_name}.csv'
  if not os.path.exists(folder_name): os.makedirs(folder_name)

  for epoch in range(num_epochs):
    train_loss, train_acc, train_n = 0.0, 0.0, 0
    test_acc, test_n = 0.0, 0
    start = time.time()

    model.train()
    for X, y in train:
      X, y = X.to(device), y.to(device)
      trainer.zero_grad()
      y_pred = model(X)
      l = loss(y_pred, y)
      l.backward()
      trainer.step()
      train_loss += l.item()
      train_acc += (y_pred.argmax(axis=1) == y).sum().item()
      train_n += y.shape[0]

    model.eval()
    for X, y in test:
      X, y = X.to(device), y.to(device)
      test_acc += (model(X).argmax(axis=1) == y).sum()
      test_n += y.shape[0]

    train_acc = train_acc / train_n
    train_loss = train_loss / train_n
    test_acc = test_acc.item() / test_n

    print(f'epoch: {epoch+1}, train_loss: {train_loss}, train_acc: {train_acc}, test_acc: {test_acc}, time: {time.time() - start:.1f}')

    models_metrics.append([model_name, epoch+1, train_loss, train_acc, test_acc])

  with open(file_name, mode='w', newline='') as f:
    writer = csv.writer(f)
    writer.writerows(models_metrics)

In [None]:
model_resnet = torchvision.models.resnet18(pretrained=False)
model_resnet



ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
model_resnet.fc = torch.nn.Linear(in_features=512, out_features=27)
model_resnet.to(device)

trainer = torch.optim.Adam(model_resnet.parameters(), lr=0.001)
model_training(model_resnet, train, test, trainer, device, 1)

epoch: 1, train_loss: 0.3070030310864632, train_acc: 0.9021554487179487, test_acc: 0.9066346153846154, time: 587.1


# 3. VGG 16

In [None]:
model_vgg = torchvision.models.vgg16(pretrained=False)
model_vgg



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
model_vgg.classifier[6] = torch.nn.Linear(in_features=4096, out_features=27)
model_vgg.to(device)

trainer = torch.optim.Adam(model_vgg.parameters(), lr=0.001)
model_training(model_vgg, train, test, trainer, device, 1)

epoch: 1, train_loss: 3.307714084723057, train_acc: 0.03833333333333333, test_acc: 0.038461538461538464, time: 2052.7


# 4. Inception v3

In [8]:
transforms = torchvision.transforms.Compose([
    torchvision.transforms.Grayscale(3),
    torchvision.transforms.Resize((299, 299)),
    torchvision.transforms.ToTensor()
])

train_dataset = torchvision.datasets.EMNIST(root='data', split='letters', train=True,
                                            transform=transforms, download=True)
test_dataset = torchvision.datasets.EMNIST(root='data', split='letters', train=False,
                                           transform=transforms, download=True)

train = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [6]:
model_inception = torchvision.models.inception_v3(pretrained=False, aux_logits=False)
model_inception



Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri

In [7]:
model_inception.fc = torch.nn.Linear(in_features=2048, out_features=27)
model_inception.to(device)

trainer = torch.optim.Adam(model_inception.parameters(), lr=0.001)
model_training(model_inception, train, test, trainer, device, 1)

epoch: 1, train_loss: 0.3334550128838955, train_acc: 0.8934615384615384, test_acc: 0.9067788461538462, time: 55266.6


# 5. DenseNet 161

In [9]:
model_densenet = torchvision.models.densenet161(pretrained=False)
model_densenet

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (rel

In [10]:
model_densenet.classifier = torch.nn.Linear(in_features=2048, out_features=27)
model_densenet.to(device)

trainer = torch.optim.Adam(model_densenet.parameters(), lr=0.001)
model_training(model_densenet, train, test, trainer, device, 1)

: 

# Сводная таблица

In [None]:
df_result = pd.DataFrame(columns=['model', 'epoch', 'train_loss', 'train_acc', 'test_acc'])

for file_name in os.listdir('metrics/'):
    df_result = pd.concat([
        df_result,
        pd.read_csv(f'metrics/{file_name}', header=0, encoding='windows-1252')], ignore_index=True)

df_result.drop(columns='Unnamed: 0', inplace=True)
df_result.head()

  df_result = pd.concat([


Unnamed: 0,model,epoch,train_loss,train_acc,test_acc
0,ResNet,1,0.307003,0.902155,0.906635
1,Inception3,1,0.333455,0.893462,0.906779
2,VGG,1,3.307714,0.038333,0.038462


Все модели обучены только на 1 эпохе из-за ограниченного количество ресурсов. Размер батча 128. При этом, ResNet_18 и VGG_16 обучались на GPU, а Inception_v3 и DenseNet_161 - на CPU, из-за этого время обучения сильно отличается.

Как видно из таблицы, для VGG_16 одной эпохи оказалось мало, результат значительно хуже чем у остальных моделей.