In [1]:
%pylab
import pandas as pd
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F
from tqdm import tqdm

import matplotlib.pyplot as plt
import os
import cv2

Using matplotlib backend: TkAgg
%pylab is deprecated, use %matplotlib inline and import the required libraries.
Populating the interactive namespace from numpy and matplotlib


In [3]:
# рабочее пространство нашего датасета
workspace = '/home/caruwoga/Documents/nn/letters_ds/'

# папки, с которыми будем вести работу
folders = ['test/', 'train/']

# алфавит, с которым мы работаем
alphabet = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя'

In [None]:
# создаем папки для тестовой и тренировочной выборок
os.makedirs(workspace + folders[0])
os.makedirs(workspace + folders[1])

In [None]:
# создадим подпапки для букв
for dataset in range(2):
    for letter in alphabet:
        os.makedirs(workspace + folders[dataset] + letter)

In [None]:
# сформируем выборки
import random
import shutil

pics_dir = workspace + 'all_letters_image/'

for label in range(1, 34):
    letter = alphabet[label-1]
    
    if label < 10:
        primary_sym = '0' + str(label)
    else:
        primary_sym = str(label)

    pics_list = []

    for file in sorted(os.listdir(pics_dir)):
        if file.startswith(primary_sym):
            pics_list.append(file)
        else:
            break

    test_data = random.sample(pics_list, k=int(0.2 * len(pics_list)))
    train_data = list(set(pics_list) - set(test_data))

    # 20% в тестовую выборку
    for file in test_data:
        shutil.move(pics_dir + file, workspace + folders[0] + letter)

    # остальное в тренировочную выборку
    for file in train_data:
        shutil.move(pics_dir + file, workspace + folders[1] + letter)

In [28]:
# напишем класс для работы с выборками
class LettersDataset(torch.utils.data.Dataset):
    def __init__(self, workspace_path:str):
        super().__init__()

        self.workspace_path = workspace_path
        self.pics_list = [] # список фотографий

        for letter in alphabet:
            self.pics_list.append(sorted(os.listdir(workspace_path + letter)))
        
    def __len__(self):
        length = 0
        for label in range(33):
            length += len(self.pics_list[label])
        return length

    def __getitem__(self, idx):
        board = 0
        for label in range(33):
            board += len(self.pics_list[label])
            if idx < board:
                class_id = label
                break

        # смещение
        new_idx = idx - (board - len(self.pics_list[class_id]))

        img_path = self.workspace_path + alphabet[class_id] + '/' + self.pics_list[class_id][new_idx]
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)

        # BGR -> RGB
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (32, 32))

        img = img.astype(np.float32)
        img = img / 255.0

        img = img.transpose((2, 0, 1))

        t_image = torch.from_numpy(img)
        t_class_id = torch.tensor(class_id)
        
        return {'img': t_image, 'label': t_class_id}

In [29]:
# создадим выборки
train_path = workspace + folders[1]
test_path = workspace + folders[0]

train = LettersDataset(train_path) # тренировочная
test = LettersDataset(test_path) # тестовая

In [30]:
# количество элементов в тренировочной выборке
print(len(train))

# количество элементов в тестовой выборке
print(len(test))

11352
2838


In [31]:
# есть ли в наличии cuda
torch.cuda.is_available()

False

In [32]:
# ?
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [33]:
# batch_size - количество одновременно обрабатываемых процессором примеров
batch_size = 11

train_loader = torch.utils.data.DataLoader(
    train, batch_size = batch_size,
    shuffle = True, num_workers = 2)

test_loader = torch.utils.data.DataLoader(
    test, batch_size = batch_size,
    shuffle = True, num_workers = 2)

In [34]:
print(f'Для преодоления одной эпохи тренировочного набора требуется: {len(train) / batch_size}')
print(f'Для преодоления одной эпохи тестового набора требуется: {len(test) / batch_size}')

Для преодоления одной эпохи тренировочного набора требуется: 1032.0
Для преодоления одной эпохи тестового набора требуется: 258.0


In [35]:
# написанные модели

# первая модель
class firstNet(nn.Module):
    def __init__(self):
        super().__init__() 
        
        # функция активации
        self.act = nn.LeakyReLU(0.25) #0.2
        
        # выбираем наибольшее из двух
        self.maxpool = nn.MaxPool2d(2, 2)

        # двумерная свертка
        self.conv0 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3)
        self.conv1 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size = 3)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size = 2)

        # сглаживание
        self.flatten = nn.Flatten()

        # полносвязные слои
        self.linear0 = nn.Linear(in_features=512, out_features=128)
        self.linear1 = nn.Linear(in_features=128, out_features=128)

        # борьба с переобучением
        self.drop0 = nn.Dropout(p=0.35)
        self.drop1 = nn.Dropout(p=0.25)
        self.drop2 = nn.Dropout(p=0.05)

        # выходной слой
        self.out = nn.Linear(in_features=128, out_features=33)

    def forward(self, x):
        # [3, 32, 32] 
        out = self.conv0(x)
        out = self.act(out)
        #print(out.shape) # [32, 30, 30]        
        out = self.maxpool(out)
        #print(out.shape) # [32, 15, 15]
        out = self.conv1(out)
        out = self.act(out)
        out = self.drop2(out)
        out = self.maxpool(out)
        #print(out.shape) # [64, 6, 6]
        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)
        #print(out.shape) # [128, 2, 2]
        out = self.flatten(out)
        out = self.linear0(out)
        out = self.act(out)
        out = self.drop0(out)
        out = self.linear1(out)
        out = self.drop1(out)
        out = self.out(out)
        return out

# вторая модель
class paddingNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.act = nn.LeakyReLU(0.35)

        self.maxpool = nn.MaxPool2d(2, 2)

        self.conv0 = nn.Conv2d(in_channels=3, out_channels=512, kernel_size=3, padding=2)
        self.conv1 = nn.Conv2d(in_channels=512, out_channels=256, kernel_size=2, padding=1)
        self.conv2 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=2)
        
        self.flatten = nn.Flatten()

        self.linear0 = nn.Linear(in_features=2048, out_features=1024)
        self.linear1 = nn.Linear(in_features=1024, out_features=512)
        self.linear2 = nn.Linear(in_features=512, out_features=256)

        self.drop0 = nn.Dropout(p=0.3)
        self.drop1 = nn.Dropout(p=0.2)

        self.out = nn.Linear(in_features=256, out_features=33)

    def forward(self, x):
        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)
        out = self.drop0(out)
        out = self.conv1(out)
        out = self.act(out)
        out = self.maxpool(out)
        out = self.drop1(out)
        out = self.conv2(out)
        out = self.act(out)
        out = self.maxpool(out)
        out = self.flatten(out)
        out = self.linear0(out)
        out = self.act(out)
        out = self.linear1(out)
        out = self.act(out)
        out = self.drop1(out)
        out = self.linear2(out)
        out = self.out(out)
        return out

# третья модель
class TinyNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.act = nn.LeakyReLU(0.15)

        self.maxpool = nn.MaxPool2d(2, 2)
        
        self.conv0 = nn.Conv2d(in_channels=3, out_channels=512, kernel_size=4)
        self.conv1 = nn.Conv2d(in_channels=512, out_channels=64, kernel_size=3)

        self.flatten = nn.Flatten()

        self.linear0 = nn.Linear(in_features=9216, out_features=800)

        self.drop0 = nn.Dropout(p=0.2)

        self.out = nn.Linear(in_features=800, out_features=33)

    def forward(self, x):
        out = self.conv0(x)
        out = self.act(out)
        out = self.maxpool(out)
        out = self.conv1(out)
        out = self.act(out)
        out = self.flatten(out)
        out = self.linear0(out)
        out = self.act(out)
        out = self.drop0(out)
        out = self.out(out)
        return out
        

In [36]:
model = firstNet()

#model = TinyNet()
model.to(device)

firstNet(
  (act): LeakyReLU(negative_slope=0.25)
  (maxpool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear0): Linear(in_features=512, out_features=128, bias=True)
  (linear1): Linear(in_features=128, out_features=128, bias=True)
  (drop0): Dropout(p=0.35, inplace=False)
  (drop1): Dropout(p=0.25, inplace=False)
  (drop2): Dropout(p=0.05, inplace=False)
  (out): Linear(in_features=128, out_features=33, bias=True)
)

In [37]:
# подсчитаем число параметров
params_num = 0
for x in model.parameters():
    params_num += len(torch.flatten(x))

print(f'Число параметров в модели: {params_num:,}')

Число параметров в модели: 138,721


In [38]:
for sample in train_loader:
    img = sample['img'].to(device)
    label = sample['label']
    print(f'input shape: {img.shape}')
    print(f'after network shape: {model(img).shape}')
    model(img)
    break

input shape: torch.Size([11, 3, 32, 32])
after network shape: torch.Size([11, 33])


In [39]:
# функция потерь
loss_fn = nn.CrossEntropyLoss()

# оптимизатор
# lr - скорость обучения
# lr = 0.00003

lr = 0.001
#lr = 0.0005 # first model 

optimizer = torch.optim.Adam(model.parameters(), lr)

In [40]:
def train_one_epoch(model):
    model.train(True) # модель находится в режиме обучения
    
    current_loss = 0.0
    current_accuracy = 0.0

    sum_loss = 0.0

    # номер пакета и данные
    for batch_index, sample in enumerate(train_loader):
        img = sample['img'].to(device)
        label = sample['label'].to(device)

        # если есть ненулевые значения для градиентов, зануляем их ?
        optimizer.zero_grad()

        # выходные данные модели для этого пакета
        # форма: [batch_size, 33]
        out = model(img) 
        
        # сумма значений меток = предсказание для этого изображения
        correct = torch.sum(label == torch.argmax(out, dim=1)).item() 

        # точность
        current_accuracy += correct / batch_size
        
        # сравниваем значения выходных данных с истинными метками
        loss = loss_fn(out, label)
        current_loss += loss.item()
        
        # обратно идем - обратное распространение
        loss.backward()
        optimizer.step()
        
        # проверяем в каждых 500 пакетах средние потери
        if batch_index % 500 == 499: 
            avg_loss = current_loss / 500
            sum_loss += current_loss
            avg_accuracy = (current_accuracy / 500) * 100

            print('Batch {0}, loss: {1:.3f}, Accuracy: {2:.1f}%'.format(batch_index+1,
                                                                       avg_loss, avg_accuracy))
            
            current_loss = 0.0
            current_accuracy = 0.0
    sum_loss = sum_loss / len(train_loader)
    
    return sum_loss

In [41]:
def test_one_epoch(model):
    model.train(False) # модель находится в режиме оценки

    current_loss = 0.0
    current_accuracy = 0.0

    for index, sample in enumerate(test_loader):
        img = sample['img'].to(device)
        label = sample['label'].to(device)

        # об этом не беспокоимся, т.к. ничего не обучаем
        with torch.no_grad():
            out = model(img)
            correct = torch.sum(label == torch.argmax(out, dim=1)).item() 
            current_accuracy += correct / batch_size
            loss = loss_fn(out, label)
            current_loss += loss.item()

    avg_loss = current_loss / len(test_loader)
    avg_accuracy = (current_accuracy / len(test_loader)) * 100

    print('Test loss: {0:.3f}, Test Accuracy: {1:.1f}%'.format(avg_loss, avg_accuracy))
    print('**********************************************')
    print()

    return avg_loss, avg_accuracy

In [42]:
num_epochs = 10

tr_los, ts_los, ts_acc = [], [], []

for epoch_index in range(num_epochs):
    print(f'Epoch: {epoch_index + 1}\n')

    train_losses = train_one_epoch(model)
    test_losses, test_accuracies = test_one_epoch(model)
    
    tr_los.append(train_losses)
    ts_los.append(test_losses)
    ts_acc.append(test_accuracies)

print('Finished Training')

Epoch: 1

Batch 500, loss: 3.459, Accuracy: 4.6%
Batch 1000, loss: 3.279, Accuracy: 8.3%
Test loss: 2.847, Test Accuracy: 18.0%
**********************************************

Epoch: 2

Batch 500, loss: 2.624, Accuracy: 22.7%
Batch 1000, loss: 2.044, Accuracy: 39.6%
Test loss: 1.621, Test Accuracy: 52.7%
**********************************************

Epoch: 3

Batch 500, loss: 1.644, Accuracy: 51.5%
Batch 1000, loss: 1.425, Accuracy: 57.8%
Test loss: 1.198, Test Accuracy: 64.6%
**********************************************

Epoch: 4

Batch 500, loss: 1.256, Accuracy: 62.5%
Batch 1000, loss: 1.136, Accuracy: 66.2%
Test loss: 0.982, Test Accuracy: 71.9%
**********************************************

Epoch: 5

Batch 500, loss: 0.995, Accuracy: 70.3%
Batch 1000, loss: 0.996, Accuracy: 70.1%
Test loss: 0.900, Test Accuracy: 73.0%
**********************************************

Epoch: 6

Batch 500, loss: 0.869, Accuracy: 73.1%
Batch 1000, loss: 0.894, Accuracy: 73.1%
Test loss: 0.824, Tes

In [43]:
def plot_results(test_accuracy, test_loss, train_loss):
    epochs = range(1, num_epochs + 1)
    plt.figure(figsize(12, 4))
    plt.subplot(1, 2, 1)
    
    plt.plot(epochs, train_loss, 'bo-', label='Training loss')
    plt.plot(epochs, test_loss, 'ro-', label='Test loss')
    plt.title('Training and validation loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(epochs, test_accuracy, 'go-', label='Test accuracy')
    plt.title('Validation accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.tight_layout()
    plt.show()

In [44]:
plot_results(ts_acc, ts_los, tr_los)

In [107]:
def train_model(model, num_epochs, train_loader, valid_loader, optimizer, criterion):
    train_loss_avg = []
    valid_loss_avg = []
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0

        
        for index, sample in enumerate(train_loader):
            img = sample['img'].to(device)
            label = sample['label'].to(device)

            # об этом не беспокоимся, т.к. ничего не обучаем
            with torch.no_grad():
                out = model(img)
                correct = torch.sum(label == torch.argmax(out, dim=1)).item() 
                current_accuracy += correct / batch_size
                loss = loss_fn(out, label)
                current_loss += loss.item()

        avg_loss = current_loss / len(test_loader)
        avg_accuracy = (current_accuracy / len(test_loader)) * 100
    
        return model, avg_loss, avg_accuracy

In [108]:
import torchvision as tv

model_prepaded = tv.models.resnet34(weights=tv.models.ResNet34_Weights.IMAGENET1K_V1).to(device)

num_ftrs = model_prepaded.fc.in_features

model_prepaded.fc = nn.Linear(num_ftrs, 33).to(device)
lossFunc = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model_prepaded.parameters(), lr=0.0004, weight_decay = 0.005, momentum = 0.9)

transfer_model, transfer_train_loss_avg, transfer_validate_loss_avg = train_model(model_prepaded, num_epochs, train_loader, test_loader, optimizer, lossFunc)

UnboundLocalError: local variable 'current_accuracy' referenced before assignment