# Практическое занятие №4

## Необходимые сегодня библиотеки

* torch
* timm
* scikit-learn
* pandas

## Обучение нейронных сетей

### Подготовка датасета

Датасет собран из двух наборов:
* [Cats, dogs and birds](https://universe.roboflow.com/image-classification-y0lsy/cats--dogs-and-birds)
* [Classifier_Animals](https://universe.roboflow.com/rna-class/classifier_animals)

Сам датасет можно скачать по [ссылке](https://mega.nz/file/zBBhmSCR#i6GNK9IP2BZhPS_5ayBUw1cdz-ozXXNVTvva7Vc3cPQ)

In [None]:
from torch.utils.data import Dataset
from PIL import Image
import os
from sklearn.model_selection import train_test_split
import pandas as pd
import random
from torchinfo import summary
import matplotlib.pyplot as plt

Класс для датасета

In [None]:
class ClsDataset(Dataset):
    def __init__(self, data_path, labels, input_channels=3, transform=None):
        self.data_path = data_path
        self.labels = labels
        self.input_channels = input_channels
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        fileName = self.labels.iloc[idx, 0]
        target = self.labels.iloc[idx, 1]

        img = Image.open(os.path.join(self.data_path, fileName))
        if self.input_channels == 3:
            img = img.convert('RGB')
        else:
            img = img.convert('L')

        if self.transform:
            img = self.transform(img)

        return img, target

Загрузка данных

In [None]:
def load_data(data_path, train_transform, val_transform, input_channels=3):
    path =  os.path.dirname(os.path.abspath(data_path))
    train_labels = pd.read_csv(data_path, sep=';')

    train_labels, valid_labels = train_test_split(train_labels,
                                                  test_size=0.2,
                                                  random_state=random.randint(1, 10000))

    train_labels = train_labels.reset_index(drop=True)
    valid_labels = valid_labels.reset_index(drop=True)

    train_dataset = ClsDataset(path, train_labels, input_channels, train_transform)
    val_dataset = ClsDataset(path, valid_labels, input_channels, val_transform)

    return train_dataset, val_dataset

### Подготовка к обучению

In [None]:
import torch

Оценка точности

In [None]:
def accuracy(output, target, topk=(1,)):
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].contiguous().view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0/batch_size))
    return res

In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.cnt = 0
        self.val = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.cnt += n
        self.avg = self.sum / self.cnt

Цикл обучения

In [None]:
def train(backbone, device, train_loader, criterion, optimizer):
    losses = AverageMeter()
    accs = AverageMeter()

    # switch to train mode
    backbone.train()

    for _, (data, target) in enumerate(train_loader):
        target = target.type(torch.LongTensor)
        data, target = data.to(device), target.to(device)

        # compute output
        output = backbone(data)
        loss = criterion(output, target)
        
        # measure accuracy
        acc = accuracy(output, target)
        
        # record loss and accuracy
        losses.update(loss.item(), data.size(0))
        accs.update(acc[0], data.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return losses.avg, accs.avg

Цикл валидации

In [None]:
def val(backbone, device, val_loader, criterion):
    losses = AverageMeter()
    accs = AverageMeter()

    # switch to val mode
    backbone.eval()

    with torch.no_grad():
        for _, (data, target) in enumerate(val_loader):
            target = target.type(torch.LongTensor)
            data, target = data.to(device), target.to(device)

            # compute output
            output = backbone(data)
            loss = criterion(output, target)
            
            # measure accuracy
            acc = accuracy(output, target)
            
            # record loss and accuracy
            losses.update(loss.item(), data.size(0))
            accs.update(acc[0], data.size(0))

    return losses.avg, accs.avg

### Обучение

Инициализация конфига для обучения

In [None]:
config = {
    'batch_size'    : 16,
    'total_epochs'  : 101,
    'save_epoch'    : 10,
    'learning_rate' : 0.001,
    'data_path'     : './train_data/train/list.csv', # путь до csv-файла
    'save_path'     : './weights', # путь куда сохранять модельки
    'num_classes'   : 4,
    'input_size'    : 224,
    'input_channels': 3
}

In [None]:
import os

In [None]:
if not os.path.exists(config['save_path']):
    os.mkdir(config['save_path'])

Выбор девайса для обучения: cpu/gpu

In [None]:
use_gpu = False
device = torch.device('cpu')
if torch.cuda.is_available():
    use_gpu = True
    device = torch.device('cuda:0')

print('Device is', device)

Инициализация начальных состояний

In [None]:
import time

random_seed = round(time.time() * 1000)
torch.manual_seed(random_seed)
random.seed(random_seed)

Искажения для обучения

In [None]:
import torchvision.transforms as transforms

[Типы](https://pytorch.org/vision/main/auto_examples/plot_transforms.html#sphx-glr-auto-examples-plot-transforms-py) искажений

In [None]:
# Transforms for input images
normalize = transforms.Normalize(mean=[0.498, 0.498, 0.498],
                                std=[0.502, 0.502, 0.502])

if config['input_channels'] == 1:
    normalize = transforms.Normalize(mean=[0.498], std=[0.502])

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomAutocontrast(),
    transforms.ColorJitter(brightness=.1, hue=.1),
    transforms.Resize((config['input_size'], config['input_size'])),
    transforms.ToTensor(),
    normalize,
])

val_transform = transforms.Compose([
    transforms.Resize((config['input_size'], config['input_size'])),
    transforms.ToTensor(),
    normalize,
])

Загрузка данных

In [None]:
train_dataset, val_dataset = load_data(config['data_path'], train_transform, val_transform, config['input_channels'])

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=0,
    pin_memory=use_gpu,
    drop_last=True
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=config['batch_size'],
    shuffle=True,
    num_workers=0,
    pin_memory=use_gpu,
    drop_last=True
)

print('Train num = {}'.format(len(train_dataset)))
print('Val val = {}'.format(len(val_dataset)))

print('Train batch = {}'.format(len(train_loader)))
print('Val batch = {}'.format(len(val_loader)))

Подготовка модели

In [None]:
import timm

In [None]:
timm.list_models(pretrained=True)

In [None]:
backbone = timm.create_model('mobilenetv3_large_100', pretrained=True, num_classes=config['num_classes'], in_chans=config['input_channels']).to(device)

In [None]:
backbone.get_classifier()

In [None]:
summary(backbone,input_size = (config['batch_size'], config['input_channels'], config['input_size'], config['input_size']))

Запрещаем изменение градиентов у всех слоёв, кроме финального

In [None]:
def freeze_pretrained_layers(model):
    for param in model.parameters():
            param.requires_grad = False
    model.classifier.weight.requires_grad = True
    model.classifier.bias.requires_grad = True

In [None]:
freeze_pretrained_layers(backbone)

In [None]:
print(backbone.bn1.weight.requires_grad)
print(backbone.classifier.weight.requires_grad)

Инициализация остальных параметров

In [None]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.AdamW(params=backbone.parameters(), lr=config['learning_rate'])
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [None]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

Сохранение весов модели

In [None]:
def save_checkpoint(save_path, state, epoch, tag=''):
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    filename = os.path.join(save_path, "{}checkpoint-{:06}.pth.tar".format(tag, epoch))
    torch.save(state, filename)

In [None]:
from IPython.display import clear_output
%matplotlib inline

In [None]:
print('Start train')
log = {"epoch": [], "train_loss": [],  "val_loss": [], "train_acc": [], "val_acc": []}

for epoch in (range(config['total_epochs'])):
    train_loss, train_acc = train(backbone, device, train_loader, criterion, optimizer)
    val_loss, val_acc = val(backbone, device, val_loader, criterion)

    if epoch % config['save_epoch'] == 0:
        state = {
            'epoch': epoch,
            'state_dict': backbone.state_dict(),
            'loss': val_loss,
            'acc': val_acc,
            'optimizer': optimizer.state_dict(),
            'criterion': criterion.state_dict(),
            'input_shape': (config['input_size'], config['input_size'], config['input_channels']),
            'num_classes': config['num_classes']
        }
        save_checkpoint(config['save_path'], state, epoch, '')

    log['epoch'].append(epoch)
    log['train_loss'].append(train_loss)
    log['val_loss'].append(val_loss)
    
    clear_output(wait=True)
    plt.plot(log['epoch'], log['train_loss'], label='train')
    plt.plot(log['epoch'], log['val_loss'], label='val')
    plt.legend()
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.title('Loss')
    plt.show()

    line = '[{}/{}]\t\tLR: {:.2}\t\tTrain loss: {:.3}\t\tTrain acc: {:.3}\t\tVal loss: {:.3}\t\tVal acc: {:.3}'.format(
    epoch,
    config['total_epochs']-1,
    get_lr(optimizer),
    train_loss,
    train_acc,
    val_loss,
    val_acc
    )
    print(line)
    
    scheduler.step()
    
print('Stop train')

Проверка полученной модели

In [None]:
import numpy as np

In [None]:
classes = ['Bird', 'Cat', 'Dog', 'Lion']

In [None]:
model_path = './weights/checkpoint-000100.pth.tar'

checkpoint = torch.load(model_path, device)
inf_backbone = timm.create_model('mobilenetv3_large_100', pretrained=False, num_classes=config['num_classes']).to(device)
inf_backbone.load_state_dict(checkpoint['state_dict'])
inf_backbone.eval()

test_data_path = r'.\train_data\test\dog\Cachorro72_jpg.rf.1f37f27288e95c32be7f382fc24388cd.jpg'
pil_img = Image.open(test_data_path).convert('RGB')
img = val_transform(pil_img)
img = np.array(img)
img = np.expand_dims(img, 0)

data = torch.tensor(img, dtype=torch.float32, device=device)
data = data.to(device)
output = inf_backbone(data)
soft_output = torch.softmax(output, dim=-1)
_, predicted = torch.max(soft_output.data, 1)
confidence = soft_output[0][predicted[0]]
predicted = predicted.to(device).cpu().detach().numpy()[0]

print('Class: {}\t Confidence: {}'.format(
    classes[predicted],
    confidence
))