Подключим библиотеки

In [1]:
import os
import torch
import random
import numpy as np
import pandas as pd
import torchvision
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import shutil
import time
import xml.etree.ElementTree as et

from tqdm import tqdm
from PIL import Image
from torchvision import models
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
BATCH_SIZE = 32

Выбираем, где будут выполняться вычисления.</br>
Если есть CUDA выполняем все вычисления на видеокарте. Иначе на процессоре.

In [2]:
use_gpu = torch.cuda.is_available()
device = 'cuda' if use_gpu else 'cpu'

Подготовим Датасеты

In [3]:
# Training dataset.
train_dataset = ImageFolder(
    root='data/train'
)
# Validation dataset.
valid_dataset = ImageFolder(
    root='data/test'
)

Добавим аугментаций - автовыравнивание изображений и автоконтраста (чтобы улучшить обобщающую способность моделей и внизить риск переобучения).

In [4]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
train_dataset.transform = transforms.Compose([
    transforms.Resize([70, 70]),
    transforms.RandomHorizontalFlip(), # augmentations
    transforms.RandomAutocontrast(), # augmentations
    transforms.RandomEqualize(), # augmentations
    transforms.ToTensor(),
    normalize
])

valid_dataset.transform = transforms.Compose([
    transforms.Resize([70, 70]),
    transforms.ToTensor(),
    normalize
])

In [5]:
# Training data loaders.
train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE,
    shuffle=True
)
# Validation data loaders.
valid_loader = DataLoader(
    valid_dataset, batch_size=BATCH_SIZE,
    shuffle=False
)

Возьмём предобученную нейросеть VGG19.</br>
Замораживаем все исходные слои модели.

In [6]:
def vgg_19():
    model = models.vgg19(weights=models.VGG19_Weights.IMAGENET1K_V1)
    model.classifier[6] = torch.nn.Linear(4096, len(train_dataset.classes))
    for param in model.features.parameters():
        param.requires_grad = False
    for index, block in enumerate(model.features):
        if index >= 30:
            for param in block.parameters():
                param.requires_grad = True
    return model

Для сравнения также воспользуемся моделью google

In [7]:
def google(): # pretrained=True для tensorflow
    model = models.googlenet(weights=models.GoogLeNet_Weights.IMAGENET1K_V1)
    model.fc = torch.nn.Linear(1024, len(train_dataset.classes))
    for param in model.parameters():
        param.requires_grad = True
    model.inception3a.requires_grad = False
    model.inception3b.requires_grad = False
    model.inception4a.requires_grad = False
    model.inception4b.requires_grad = False
    model.inception4c.requires_grad = False
    model.inception4d.requires_grad = False
    model.inception4e.requires_grad = False
    return model

Определяем функцию обучения модели 

In [8]:
def train(model, optimizer, train_loader, val_loader, epoch=10):
    lern_result = pd.DataFrame(columns = ('epochs','loss_train','loss_valid','acc_train','acc_valid'))

    for epoch in tqdm(range(epoch)):
        lern_new_row = dict()        
        losses, equals = [], []
        torch.set_grad_enabled(True)

        # Train.
        model.train()
        for i, (image, target) in enumerate(train_loader):
            image = image.to(device)
            target = target.to(device)
            output = model(image)
            loss = criterion(output,target)

            losses.append(loss.item())
            equals.extend(
                [x.item() for x in torch.argmax(output, 1) == target])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        lern_new_row['loss_train'] = round(np.mean(losses), 3)
        lern_new_row['acc_train'] = round(np.mean(equals), 3)

        losses, equals = [], []
        torch.set_grad_enabled(False)

        # Validate.
        model.eval()
        for i , (image, target) in enumerate(valid_loader):
            image = image.to(device)
            target = target.to(device)

            output = model(image)
            loss = criterion(output,target)

            losses.append(loss.item())
            equals.extend(
                [y.item() for y in torch.argmax(output, 1) == target])

        lern_new_row['loss_valid'] = round(np.mean(losses), 3)
        lern_new_row['acc_valid'] = round(np.mean(equals), 3)
        
        #Записшем результаты обучения
        lern_result.loc[len(lern_result.index)] = [
            epoch + 1,
            lern_new_row['loss_train'],
            lern_new_row['loss_valid'],
            lern_new_row['acc_train'],
            lern_new_row['acc_valid']
        ]

    return lern_result

Обучение Модели VGG

In [10]:
criterion = torch.nn.CrossEntropyLoss()
criterion = criterion.to(device)

model = vgg_19()
print('Model: vgg_19\n')
epochs_count = 50

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
model = model.to(device)

lern_result = train( model, optimizer, train_loader, valid_loader, epochs_count)

#Сохраним результаты обучения
torch.save(model.state_dict(), 'vgg_ready_model.pth')
#Сохраним обученную модель
lern_result.to_csv('vgg_lern_result.csv')

torch.cuda.empty_cache()
print('Обучение окончено')

Model: vgg_19



  0%|                                                    | 0/50 [00:00<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 392.00 MiB. GPU 0 has a total capacity of 1.95 GiB of which 92.19 MiB is free. Process 3181 has 488.12 MiB memory in use. Including non-PyTorch memory, this process has 1.21 GiB memory in use. Of the allocated memory 1.06 GiB is allocated by PyTorch, and 96.15 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

Обучение Модели google

In [9]:
criterion = torch.nn.CrossEntropyLoss()
criterion = criterion.to(device)

model = google()
print('Model: google\n')
epochs_count = 50

optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
model = model.to(device)

lern_result = train( model, optimizer, train_loader, valid_loader, epochs_count)

#Сохраним результаты обучения
torch.save(model.state_dict(), 'google_ready_model.pth')
#Сохраним обученную модель
lern_result.to_csv('google_lern_result.csv')

torch.cuda.empty_cache()
print('Обучение окончено')

Model: google



100%|███████████████████████████████████████████| 50/50 [13:24<00:00, 16.10s/it]

Обучение окончено



