## Урок 4. Домашнее задание

1. Обучение классификатора картинок на примере CIFAR-100 (датасет можно изменить) сверточной сетью (самописной)

In [17]:
import numpy as np
import torch

from torch import nn
from torch.nn import functional as F
from PIL import Image
from torchvision import transforms, datasets
from tqdm import tqdm

from sklearn.model_selection import train_test_split

In [18]:
from torchvision.transforms.transforms import RandomGrayscale, ColorJitter, RandomRotation
dataset = datasets.CIFAR100(root='data/', train=True, download=True)

def train_valid_split(Xt):
    X_train, X_test = train_test_split(Xt, test_size=0.2, random_state=42)
    return X_train, X_test

class MyOwnCifar(torch.utils.data.Dataset):
   
    def __init__(self, init_dataset, transform=None):
        self._base_dataset = init_dataset
        self.transform = transform

    def __len__(self):
        return len(self._base_dataset)

    def __getitem__(self, idx):
        img = self._base_dataset[idx][0]
        if self.transform is not None:
            img = self.transform(img)
        return img, self._base_dataset[idx][1]
    
trans_actions = transforms.Compose([
                                    # transforms.Resize(44),
                                    transforms.RandomGrayscale(0.2),
                                    transforms.ColorJitter(brightness=.3, hue=.4),
                                    # transforms.RandomCrop(32),
                                    transforms.RandomRotation(degrees=(0,5)),
                                    transforms.ToTensor()])

train_dataset, valid_dataset = train_valid_split(dataset)

train_dataset = MyOwnCifar(train_dataset, trans_actions)
valid_dataset = MyOwnCifar(valid_dataset, transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(train_dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=2)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                          batch_size=128,
                          shuffle=False,
                          num_workers=2)

Files already downloaded and verified


In [19]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.dp_three = nn.Dropout(0.2)
        self.dp_four = nn.Dropout(0.2)
        
        self.bn_one = torch.nn.BatchNorm2d(3)
        self.conv_one = torch.nn.Conv2d(3, 8, 5, padding=2)
        self.bn_two = torch.nn.BatchNorm2d(8) 
        self.conv_two = torch.nn.Conv2d(8, 16, 3, padding=1)
        self.bn_three = torch.nn.BatchNorm2d(16)
        self.conv_three = torch.nn.Conv2d(16, 32, 3, padding=1)
        self.bn_four = torch.nn.BatchNorm2d(32)
        self.fc1 = torch.nn.Linear(512, 256)
        self.fc2 = torch.nn.Linear(256, 128)
        self.out = torch.nn.Linear(128, 100)
        
        
    def forward(self, x):
        x = self.bn_one(x)
        x = self.conv_one(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        x = self.bn_two(x)
        x = self.conv_two(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        
        x = self.bn_three(x)
        x = self.conv_three(x)
        x = F.leaky_relu(x, 0.1)
        x = F.max_pool2d(x, 2)
        
        x = self.bn_four(x)
        x = x.view(x.size(0), -1)
        x = self.dp_three(x)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dp_four(x)
        x = self.fc2(x)
        x = F.relu(x)
        return self.out(x)
       
net = Net()
print(net)

Net(
  (dp_three): Dropout(p=0.2, inplace=False)
  (dp_four): Dropout(p=0.2, inplace=False)
  (bn_one): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_one): Conv2d(3, 8, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (bn_two): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_two): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn_three): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv_three): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn_four): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (fc1): Linear(in_features=512, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (out): Linear(in_features=128, out_features=100, bias=True)
)


In [20]:
optimizer = torch.optim.Adam(net.parameters(), lr=0.005)
criterion = nn.CrossEntropyLoss()

In [21]:
for epoch in tqdm(range(10)):  
    net.train()
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0], data[1]
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
    net.eval()
    loss_accumed = 0
    for X, y in valid_loader:
        output = net(X)
        loss = criterion(output, y)
        loss_accumed += loss
    print("Epoch {} valid_loss {}".format(epoch, loss_accumed))

print('Training is finished!')

 10%|█         | 1/10 [00:52<07:51, 52.44s/it]

Epoch 0 valid_loss 277.44403076171875


 20%|██        | 2/10 [01:37<06:25, 48.15s/it]

Epoch 1 valid_loss 252.47779846191406


 30%|███       | 3/10 [02:22<05:27, 46.75s/it]

Epoch 2 valid_loss 240.55209350585938


 40%|████      | 4/10 [03:06<04:33, 45.65s/it]

Epoch 3 valid_loss 232.0427703857422


 50%|█████     | 5/10 [03:58<03:58, 47.74s/it]

Epoch 4 valid_loss 228.36277770996094


 60%|██████    | 6/10 [04:42<03:06, 46.64s/it]

Epoch 5 valid_loss 223.90518188476562


 70%|███████   | 7/10 [05:33<02:24, 48.01s/it]

Epoch 6 valid_loss 223.08172607421875


 80%|████████  | 8/10 [06:24<01:37, 48.86s/it]

Epoch 7 valid_loss 220.5876922607422


 90%|█████████ | 9/10 [07:09<00:47, 47.71s/it]

Epoch 8 valid_loss 218.438232421875


100%|██████████| 10/10 [07:57<00:00, 47.74s/it]

Epoch 9 valid_loss 215.55419921875
Training is finished!





2. Обучение классификатора картинок на примере CIFAR-100 (датасет можно изменить) через дообучение ImageNet Resnet-50

In [30]:
from torchvision import models

resnet50 = models.resnet50(pretrained=True)
print(resnet50)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [33]:
# Необходимые трансформации
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
                                transforms.ToTensor(),
                                normalize,
                                ])

In [34]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

set_parameter_requires_grad(resnet50, True)
resnet50.fc = nn.Linear(2048, 100)

In [35]:
train_dataset, valid_dataset = train_valid_split(dataset)

train_dataset = MyOwnCifar(train_dataset, transform)
valid_dataset = MyOwnCifar(valid_dataset, transform)

train_loader = torch.utils.data.DataLoader(train_dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=2)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                          batch_size=128,
                          shuffle=False,
                          num_workers=2)

In [36]:
params_to_update = []
for name,param in resnet50.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)

optimizer = torch.optim.Adam(params_to_update, lr=0.005)
criterion = nn.CrossEntropyLoss()

In [37]:
for epoch in tqdm(range(10)):  
    resnet50.train()
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0], data[1]
        optimizer.zero_grad()

        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
    resnet50.eval()
    loss_accumed = 0
    for X, y in valid_loader:
        output = resnet50(X)
        loss = criterion(output, y)
        loss_accumed += loss
    print("Epoch {} valid_loss {}".format(epoch, loss_accumed))

print('Training is finished!')

 10%|█         | 1/10 [06:55<1:02:23, 415.92s/it]

Epoch 0 valid_loss 340.979248046875


 20%|██        | 2/10 [14:11<56:58, 427.31s/it]  

Epoch 1 valid_loss 350.9792785644531


 30%|███       | 3/10 [21:16<49:45, 426.54s/it]

Epoch 2 valid_loss 366.44195556640625


 40%|████      | 4/10 [28:14<42:18, 423.16s/it]

Epoch 3 valid_loss 358.8343505859375


 50%|█████     | 5/10 [35:13<35:08, 421.64s/it]

Epoch 4 valid_loss 379.38336181640625


 60%|██████    | 6/10 [42:15<28:07, 421.80s/it]

Epoch 5 valid_loss 378.8239440917969


 70%|███████   | 7/10 [49:12<21:00, 420.17s/it]

Epoch 6 valid_loss 365.0445861816406


 80%|████████  | 8/10 [56:07<13:56, 418.41s/it]

Epoch 7 valid_loss 380.95892333984375


 90%|█████████ | 9/10 [1:03:03<06:57, 417.70s/it]

Epoch 8 valid_loss 397.9240417480469


100%|██████████| 10/10 [1:10:04<00:00, 420.41s/it]

Epoch 9 valid_loss 385.9942321777344
Training is finished!





В таком формате не удалось добиться каких-то значимых улучшений.

3. Обучение классификатора картинок на примере CIFAR-100 (датасет можно изменить) через дообучение ImageNet Resnet-50 с аугментацией (самописной, с использованием Pytorch встроенных методов)

In [38]:
trans_actions = transforms.Compose([transforms.RandomGrayscale(0.2),
                                    transforms.ColorJitter(brightness=.3, hue=.4),
                                    transforms.RandomRotation(degrees=(0,5)),
                                    transforms.ToTensor(),
                                    normalize])

valid_transforms = transforms.Compose([transforms.ToTensor(),
                                       normalize])

train_dataset, valid_dataset = train_valid_split(dataset)

train_dataset = MyOwnCifar(train_dataset, trans_actions)
valid_dataset = MyOwnCifar(valid_dataset, valid_transforms)

train_loader = torch.utils.data.DataLoader(train_dataset,
                          batch_size=128,
                          shuffle=True,
                          num_workers=2)
valid_loader = torch.utils.data.DataLoader(valid_dataset,
                          batch_size=128,
                          shuffle=False,
                          num_workers=2)

In [39]:
optimizer = torch.optim.Adam(params_to_update, lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in tqdm(range(10)):  
    resnet50.train()
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data[0], data[1]
        optimizer.zero_grad()

        outputs = resnet50(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
    resnet50.eval()
    loss_accumed = 0
    for X, y in valid_loader:
        output = resnet50(X)
        loss = criterion(output, y)
        loss_accumed += loss
    print("Epoch {} valid_loss {}".format(epoch, loss_accumed))

print('Training is finished!')

 10%|█         | 1/10 [07:04<1:03:36, 424.03s/it]

Epoch 0 valid_loss 377.9984436035156


 20%|██        | 2/10 [14:18<57:20, 430.07s/it]  

Epoch 1 valid_loss 354.27520751953125


 30%|███       | 3/10 [21:27<50:08, 429.84s/it]

Epoch 2 valid_loss 349.52825927734375


 40%|████      | 4/10 [28:41<43:08, 431.36s/it]

Epoch 3 valid_loss 372.5736389160156


 50%|█████     | 5/10 [35:54<36:00, 432.08s/it]

Epoch 4 valid_loss 341.8209228515625


 60%|██████    | 6/10 [43:09<28:51, 432.99s/it]

Epoch 5 valid_loss 342.4614562988281


 70%|███████   | 7/10 [50:21<21:37, 432.61s/it]

Epoch 6 valid_loss 318.5144348144531


 80%|████████  | 8/10 [57:29<14:22, 431.20s/it]

Epoch 7 valid_loss 331.2615966796875


 90%|█████████ | 9/10 [1:04:40<07:10, 430.94s/it]

Epoch 8 valid_loss 314.64593505859375


100%|██████████| 10/10 [1:11:44<00:00, 430.44s/it]

Epoch 9 valid_loss 309.9242858886719
Training is finished!





Изменив скорость обучения и добавив аугментацию изображений, удалось повысить качество модели и улучшить метрику.