<a href="https://colab.research.google.com/github/lisosoma/BI_ML/blob/main/practice_dl_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import numpy as np
from sklearn.metrics import roc_auc_score

# Практикум 3, DL for images



## 1. Пример работы со свертками 


In [None]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))])

training_data = datasets.CIFAR10(
    root='data',
    train=True, 
    download=True,
    transform=transform
)

test_data = datasets.CIFAR10(
    root='data',
    train=False, 
    download=True,
    transform=transform
)

trainloader = torch.utils.data.DataLoader(training_data, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=128, shuffle=False)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting data/cifar-10-python.tar.gz to data
Files already downloaded and verified


In [None]:
from torch import nn
import torch.nn.functional as F

class CNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x


In [None]:
import torch.optim as optim

model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

In [None]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 0:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,     1] loss: 0.001
[1,   101] loss: 0.115
[1,   201] loss: 0.115
[1,   301] loss: 0.115
[2,     1] loss: 0.001
[2,   101] loss: 0.115
[2,   201] loss: 0.115
[2,   301] loss: 0.115
[3,     1] loss: 0.001
[3,   101] loss: 0.114
[3,   201] loss: 0.114
[3,   301] loss: 0.112
[4,     1] loss: 0.001
[4,   101] loss: 0.107
[4,   201] loss: 0.105
[4,   301] loss: 0.103
[5,     1] loss: 0.001
[5,   101] loss: 0.099
[5,   201] loss: 0.098
[5,   301] loss: 0.097
[6,     1] loss: 0.001
[6,   101] loss: 0.094
[6,   201] loss: 0.093
[6,   301] loss: 0.091
[7,     1] loss: 0.001
[7,   101] loss: 0.089
[7,   201] loss: 0.088
[7,   301] loss: 0.086
[8,     1] loss: 0.001
[8,   101] loss: 0.084
[8,   201] loss: 0.084
[8,   301] loss: 0.082
[9,     1] loss: 0.001
[9,   101] loss: 0.082
[9,   201] loss: 0.080
[9,   301] loss: 0.079
[10,     1] loss: 0.001
[10,   101] loss: 0.078
[10,   201] loss: 0.078
[10,   301] loss: 0.077
Finished Training


In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 44 %


In [None]:
# prepare to count predictions for each class
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
                                                   accuracy))

Accuracy for class plane is: 41.5 %
Accuracy for class car   is: 61.9 %
Accuracy for class bird  is: 22.0 %
Accuracy for class cat   is: 23.6 %
Accuracy for class deer  is: 31.5 %
Accuracy for class dog   is: 43.3 %
Accuracy for class frog  is: 60.6 %
Accuracy for class horse is: 56.3 %
Accuracy for class ship  is: 53.0 %
Accuracy for class truck is: 50.7 %


# Задание 1

Написать и обучить нейронную сеть на датасете CIFAR (5 баллов)
1. Замените сверточные слои размера 5х5 на два идущих подряд слоя размером 3х3
2. Обучите модель на GPU, 20 эпох


Дополнительно:
1. (5 баллов) Переписать код с использованием pytorch lightning (см. документацию фреймворка https://pytorch-lightning.readthedocs.io/en/latest/starter/introduction.html)

### Написать и обучить нейронную сеть на датасете CIFAR (5 баллов)

1. Замените сверточные слои размера 5х5 на два идущих подряд слоя размером 3х3

Заменим слои 5x5, используя то же самое количество фильтров. Pool будем применять после двух сверток 3х3.

In [None]:
from torch import nn
import torch.nn.functional as F

class CNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6, 3)
    self.conv2 = nn.Conv2d(6, 6, 3)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv3 = nn.Conv2d(6, 16, 3)
    self.conv4 = nn.Conv2d(16, 16, 3)
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, x):
    x = self.conv1(x)
    x = self.pool(F.relu(self.conv2(x)))
    x = self.conv3(x)
    x = self.pool(F.relu(self.conv4(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x


In [None]:
model = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

2. Обучите модель на GPU, 20 эпох

In [None]:
for epoch in range(20):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 0:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,     1] loss: 0.001
[1,   101] loss: 0.115
[1,   201] loss: 0.115
[1,   301] loss: 0.115
[2,     1] loss: 0.001
[2,   101] loss: 0.115
[2,   201] loss: 0.115
[2,   301] loss: 0.115
[3,     1] loss: 0.001
[3,   101] loss: 0.115
[3,   201] loss: 0.115
[3,   301] loss: 0.115
[4,     1] loss: 0.001
[4,   101] loss: 0.115
[4,   201] loss: 0.115
[4,   301] loss: 0.115
[5,     1] loss: 0.001
[5,   101] loss: 0.115
[5,   201] loss: 0.115
[5,   301] loss: 0.115
[6,     1] loss: 0.001
[6,   101] loss: 0.115
[6,   201] loss: 0.115
[6,   301] loss: 0.114
[7,     1] loss: 0.001
[7,   101] loss: 0.113
[7,   201] loss: 0.111
[7,   301] loss: 0.108
[8,     1] loss: 0.001
[8,   101] loss: 0.104
[8,   201] loss: 0.103
[8,   301] loss: 0.102
[9,     1] loss: 0.001
[9,   101] loss: 0.100
[9,   201] loss: 0.098
[9,   301] loss: 0.098
[10,     1] loss: 0.001
[10,   101] loss: 0.096
[10,   201] loss: 0.094
[10,   301] loss: 0.093
[11,     1] loss: 0.001
[11,   101] loss: 0.091
[11,   201] loss: 0.090
[11,

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 48 %


Как видно, точность увеличилась на 4%.

In [None]:
# prepare to count predictions for each class
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
                                                   accuracy))

Accuracy for class plane is: 45.4 %
Accuracy for class car   is: 58.6 %
Accuracy for class bird  is: 29.2 %
Accuracy for class cat   is: 24.0 %
Accuracy for class deer  is: 23.6 %
Accuracy for class dog   is: 46.4 %
Accuracy for class frog  is: 71.2 %
Accuracy for class horse is: 62.4 %
Accuracy for class ship  is: 66.8 %
Accuracy for class truck is: 52.5 %


### Дополнительно:
1. (5 баллов) Переписать код с использованием pytorch lightning (см. документацию фреймворка https://pytorch-lightning.readthedocs.io/en/latest/starter/introduction.html)

In [None]:
!pip install pytorch-lightning

In [None]:
import os
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import DataLoader, random_split
import pytorch_lightning as pl
import torch.optim as optim

Создадим класс CNN, используя указанный фреймворк.

In [None]:
CLASSES = 10
IMG_SIZE = 32

class CNN(pl.LightningModule):
    def __init__(self):
      super().__init__()
      self.conv1 = nn.Conv2d(3, 6, 3)
      self.conv2 = nn.Conv2d(6, 6, 3)
      self.pool = nn.MaxPool2d(2, 2)
      self.conv3 = nn.Conv2d(6, 16, 3)
      self.conv4 = nn.Conv2d(16, 16, 3)
      self.fc1 = nn.Linear(16*5*5, 120)
      self.fc2 = nn.Linear(120, 84)
      self.fc3 = nn.Linear(84, 10)

    def forward(self,x):
      x = self.conv1(x)
      x = self.pool(F.relu(self.conv2(x)))
      x = self.conv3(x)
      x = self.pool(F.relu(self.conv4(x)))
      x = torch.flatten(x, 1)
      x = F.relu(self.fc1(x))
      x = F.relu(self.fc2(x))
      x = self.fc3(x)
      return x

    def loss_fn(self,out,target):
      return nn.CrossEntropyLoss()(out.view(-1,CLASSES),target)
    
    def configure_optimizers(self):
      optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)
      return optimizer

    def training_step(self,batch,batch_idx):
      x,y = batch
      img = x.view(-1,3,IMG_SIZE,IMG_SIZE)
      label = y.view(-1)
      out = self(img)
      loss = self.loss_fn(out,label)
      self.log('train_loss', loss)
      return loss       


model = CNN()
trainer = pl.Trainer(gpus=-1,max_epochs=20)
trainer.fit(model, trainloader, testloader) 

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn("You passed in a `val_dataloader` but have no `validation_step`. Skipping val loop.")
Missing logger folder: /content/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type      | Params
------------------------------------
0 | conv1 | Conv2d    | 168   
1 | conv2 | Conv2d    | 330   
2 | pool  | MaxPool2d | 0     
3 | conv3 | Conv2d    | 880   
4 | conv4 | Conv2d    | 2.3 K 
5 | fc1   | Linear    | 48.1 K
6 | fc2   | Linear    | 10.2 K
7 | fc3   | Linear    | 850   
------------------------------------
62.8 K    Trainable params
0         Non-trainable params
62.8 K    Total params
0.251     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 49 %


In [None]:
# prepare to count predictions for each class
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
                                                   accuracy))

Accuracy for class plane is: 50.3 %
Accuracy for class car   is: 73.4 %
Accuracy for class bird  is: 25.7 %
Accuracy for class cat   is: 23.6 %
Accuracy for class deer  is: 44.0 %
Accuracy for class dog   is: 46.4 %
Accuracy for class frog  is: 65.1 %
Accuracy for class horse is: 57.5 %
Accuracy for class ship  is: 61.7 %
Accuracy for class truck is: 50.3 %


## 2. Используем VGG

In [None]:
import torchvision.models as models

vgg11 = models.vgg11(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg11-8a719046.pth" to /root/.cache/torch/hub/checkpoints/vgg11-8a719046.pth


  0%|          | 0.00/507M [00:00<?, ?B/s]

In [None]:
from torchvision import transforms

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

training_data = datasets.CIFAR10(
    root="data", # root is the path where the train/test data is stored
    train=True, # train specifies training or test dataset
    download=True, # download=True downloads the data from the internet if it’s not available at root
    transform=transform # transform and target_transform specify the feature and label transformations
)

test_data = datasets.CIFAR10(
    root="data",
    train=False,
    download=True,
    transform=transform
)

trainloader = torch.utils.data.DataLoader(training_data, batch_size=128,
                                          shuffle=True, num_workers=2)

testloader = torch.utils.data.DataLoader(test_data, batch_size=128,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


# Задание 2 (5 баллов)
1. Обучитите на GPU претренированную модель VGG (минимум 5 эпох)
2. Протестируйте ее на всех данных и на каждом классе отдельно

Дополнительное задание (5 баллов):     

* Взять какой-нибудь специфичный датасет с картинками
* Взять претренированную VGG (или другу модель) и сделать transfer learning на выбранный датасет

In [None]:
model = vgg11
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)

1. Обучитите на GPU претренированную модель VGG (минимум 5 эпох)

In [None]:
for epoch in range(5):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 0:    # print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,     1] loss: 0.014
[1,   101] loss: 0.138
[1,   201] loss: 0.073
[1,   301] loss: 0.058
[2,     1] loss: 0.000
[2,   101] loss: 0.045
[2,   201] loss: 0.041
[2,   301] loss: 0.038
[3,     1] loss: 0.000
[3,   101] loss: 0.034
[3,   201] loss: 0.031
[3,   301] loss: 0.031
[4,     1] loss: 0.000
[4,   101] loss: 0.027
[4,   201] loss: 0.026
[4,   301] loss: 0.027
[5,     1] loss: 0.000
[5,   101] loss: 0.023
[5,   201] loss: 0.022
[5,   301] loss: 0.022
Finished Training


2. Протестируйте ее на всех данных и на каждом классе отдельно

In [None]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
model.eval()
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 80 %


In [None]:
# prepare to count predictions for each class
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print("Accuracy for class {:5s} is: {:.1f} %".format(classname,
                                                   accuracy))

Accuracy for class plane is: 80.9 %
Accuracy for class car   is: 93.2 %
Accuracy for class bird  is: 60.8 %
Accuracy for class cat   is: 67.4 %
Accuracy for class deer  is: 81.2 %
Accuracy for class dog   is: 76.4 %
Accuracy for class frog  is: 85.6 %
Accuracy for class horse is: 83.9 %
Accuracy for class ship  is: 94.2 %
Accuracy for class truck is: 83.4 %


Как видно, точность увеличилась на 32%.
