## ECL 연구프로젝트 스터디 2차
### PyTorch Tutorial
kaggle Playground Competition - [Aerial Cactus Identification](https://https://www.kaggle.com/c/aerial-cactus-identification)</br>
PyTorch를 이용해 간단한 Image Classification 문제를 해결해봅시다~!

In [None]:
import numpy as np
import pandas as pd
import os
import random

import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
from torch.utils.data import DataLoader, Dataset
import torch.utils.data as utils
from torchvision import transforms

from sklearn.model_selection import train_test_split
from typing import Type, Any, Callable, Union, List, Optional
from tqdm.auto import tqdm
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline


import warnings
warnings.filterwarnings("ignore")

# 데이터 Setup

In [None]:
!unzip /content/aerial-cactus-identification.zip

In [None]:
!unzip /content/train.zip
!unzip /content/test.zip

In [None]:
# train.csv 열기


## Sample Image 출력

In [None]:
# image 디렉토리 경로 지정


In [None]:
# sample image 5개 출력
sample_path = random.sample(os.listdir(train_dir), 5)

fig, axs = plt.subplots(1, 5)
fig.set_size_inches(7, 15)
for i, path in enumerate(sample_path):
    sample = mpimg.imread(os.path.join(train_dir, path))
    axs[i].imshow(sample)
fig.tight_layout()
plt.show()

## Custom Dataset를 생성하고, DataLoader로 데이터 불러오기
![](https://blog.kakaocdn.net/dn/b0SVvH/btqLq2FmuEs/1hnCz8VL9wvXPKTOTXzvOk/img.jpg)</br></br>
PyTorch로 데이터를 불러오기 위해서는 custom Dataset class를 생성하고, DataLoader로 dataset class를 불러와야한다.

<dl class="class">
<dt id="torch.utils.data.Dataset">
<em class="property">class </em><code class="sig-prename descclassname">torch.utils.data.</code><code class="sig-name descname">Dataset</code></dt>
<dd><p>An abstract class representing a <a class="reference internal" href="#torch.utils.data.Dataset" title="torch.utils.data.Dataset"><code class="xref py py-class docutils literal notranslate"><span class="pre">Dataset</span></code></a>.</p>
<p>All datasets that represent a map from keys to data samples should subclass
it. All subclasses should overwrite <code class="xref py py-meth docutils literal notranslate"><span class="pre">__getitem__()</span></code>, supporting fetching a
data sample for a given key. Subclasses could also optionally overwrite
<code class="xref py py-meth docutils literal notranslate"><span class="pre">__len__()</span></code>, which is expected to return the size of the dataset by many
<a class="reference internal" href="#torch.utils.data.Sampler" title="torch.utils.data.Sampler"><code class="xref py py-class docutils literal notranslate"><span class="pre">Sampler</span></code></a> implementations and the default options
of <a class="reference internal" href="#torch.utils.data.DataLoader" title="torch.utils.data.DataLoader"><code class="xref py py-class docutils literal notranslate"><span class="pre"><span class="highlighted">DataLoader</span></span></code></a>.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p><a class="reference internal" href="#torch.utils.data.DataLoader" title="torch.utils.data.DataLoader"><code class="xref py py-class docutils literal notranslate"><span class="pre"><span class="highlighted">DataLoader</span></span></code></a> by default constructs a index
sampler that yields integral indices.  To make it work with a map-style
dataset with non-integral indices/keys, a custom sampler must be provided.</p>
</div>
</dd></dl>

<pre><span></span><span class="n"><span class="highlighted">DataLoader</span></span><span class="p">(</span><span class="n">dataset</span><span class="p">,</span> <span class="n">batch_size</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">shuffle</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">sampler</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
           <span class="n">batch_sampler</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">num_workers</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span> <span class="n">collate_fn</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
           <span class="n">pin_memory</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">drop_last</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">timeout</span><span class="o">=</span><span class="mi">0</span><span class="p">,</span>
           <span class="n">worker_init_fn</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span> <span class="n">prefetch_factor</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span>
           <span class="n">persistent_workers</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
</pre>

In [None]:
# custom Dataset 만들기


    # 데이터셋 크기 return


    # 주어진 index에 해당하는 데이터 return


In [None]:
# train, validation 데이터 split


In [None]:
# data transform 정의
data_transform = transforms.Compose([transforms.ToPILImage(),
                                     transforms.ToTensor()])


# Dataset 객체 생성 후, DataLoader로 tensor형식 데이터 iterator 불러오기


# Model 만들기

<dt id="torch.nn.Conv2d">
<em class="property">class </em><code class="sig-prename descclassname">torch.nn.</code><code class="sig-name descname">Conv2d</code><span class="sig-paren">(</span><em class="sig-param">in_channels</em>, <em class="sig-param">out_channels</em>, <em class="sig-param">kernel_size</em>, <em class="sig-param">stride=1</em>, <em class="sig-param">padding=0</em>, <em class="sig-param">dilation=1</em>, <em class="sig-param">groups=1</em>, <em class="sig-param">bias=True</em>, <em class="sig-param">padding_mode='zeros'</em><span class="sig-paren">)</span></a></dt>

<ul>
<li><p><code class="xref py py-attr docutils literal notranslate"><span class="pre">stride</span></code> controls the stride for the cross-correlation, a single
number or a tuple.</p></li>
<li><p><code class="xref py py-attr docutils literal notranslate"><span class="pre">padding</span></code> controls the amount of implicit padding on both
sides for <code class="xref py py-attr docutils literal notranslate"><span class="pre">padding</span></code> number of points for each dimension.</p></li>
<li><p><code class="xref py py-attr docutils literal notranslate"><span class="pre">dilation</span></code> controls the spacing between the kernel points; also
known as the à trous algorithm. It is harder to describe, but this <a class="reference external" href="https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md">link</a>
has a nice visualization of what <code class="xref py py-attr docutils literal notranslate"><span class="pre">dilation</span></code> does.</p></li>
<li><p><code class="xref py py-attr docutils literal notranslate"><span class="pre">groups</span></code> controls the connections between inputs and outputs.
<code class="xref py py-attr docutils literal notranslate"><span class="pre">in_channels</span></code> and <code class="xref py py-attr docutils literal notranslate"><span class="pre">out_channels</span></code> must both be divisible by
<code class="xref py py-attr docutils literal notranslate"><span class="pre">groups</span></code>. For example,</p>
<blockquote>
<div><ul class="simple">
<li><p>At groups=1, all inputs are convolved to all outputs.</p></li>
<li><p>At groups=2, the operation becomes equivalent to having two conv
layers side by side, each seeing half the input channels
and producing half the output channels, and both subsequently
concatenated.</p></li>
<li><p>At groups= <code class="xref py py-attr docutils literal notranslate"><span class="pre">in_channels</span></code>, each input channel is convolved with
its own set of filters (of size
<span class="math"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><semantics><mrow><mfrac><mtext>out_channels</mtext><mtext>in_channels</mtext></mfrac></mrow><annotation encoding="application/x-tex">\frac{\text{out\_channels}}{\text{in\_channels}}</annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:1.5751079999999997em;vertical-align:-0.5619999999999999em;"></span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.013108em;"><span style="top:-2.6550000000000002em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">in_channels</span></span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.527em;"><span class="pstrut" style="height:3em;"></span><span class="sizing reset-size6 size3 mtight"><span class="mord mtight"><span class="mord text mtight"><span class="mord mtight">out_channels</span></span></span></span></span></span><span class="vlist-s">​</span></span><span class="vlist-r"><span class="vlist" style="height:0.5619999999999999em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span>

</span>).</p></li>
</ul>
</div></blockquote>
</li>
</ul>

## 간단한 모델구조 생성

In [None]:
# nn.Module 클래스를 상속받아 나만의 모델 정의


In [None]:
# 모델 생성 후 모델정보 출력


## Conv2d Shape</br>
$$ Input: (N, C_{in}, H_{in}, W_{in}) $$
$$ Output: (N, C_{out}, H_{out}, W_{out}) $$

$${H_{out}}=⌊\frac{H_{in}+2×padding[0]−dilation[0]×(kernelsize[0]−1)−1}{stride[0]}+1⌋$$

$$W_{out}=⌊\frac{{W_{in}}+2×padding[1]−dilation[1]×(kernelsize[1]−1)−1}{stride[1]}+1⌋ $$

In [None]:
# 데이터를 모델에 넣으면 결과가 어떻게 나오는지 한번 봅시다!


## 복잡한 모델 만들기
![](https://krshrimali.github.io/assets/ResNet18-Architecture.png)

## BasicBlock

![](https://i.imgur.com/Aj8dDLj.png)

In [None]:
def conv3x3(in_channels, out_channels, stride=1, dilation=1) -> nn.Conv2d:
  return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride,
                   padding=dilation, bias=False, dilation=dilation)
  
def conv1x1(in_channels, out_channels, stride=1) -> nn.Conv2d:
  return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False)
  

class BasicBlock(nn.Module):
    expansion: int = 1

    def __init__(self, in_channels: int, channels: int, stride: int = 1):
        super(BasicBlock, self).__init__()
                
        # actual layers
        self.conv1 = conv3x3(in_channels, channels, stride)
        self.bn1 = nn.BatchNorm2d(channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(channels, channels)
        self.bn2 = nn.BatchNorm2d(channels)
        self.stride = stride
        self.downsample = nn.Sequential(
                conv1x1(in_channels, channels, stride),
                nn.BatchNorm2d(channels),
            )
    
    def forward(self, x):

        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        
        out += self.downsample(identity)
        out = self.relu(out)

        return out

## Bottleneck

In [None]:
class Bottleneck(nn.Module):
    expansion: int = 4

    def __init__(
        self,
        in_channels,
        channels,
        stride,
        downsample: Optional[nn.Module] = None,
        groups: int=1,
        base_width:int = 64,
        dilation: int=1,
        norm_layer:Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(channels * (base_width / 64.)) * groups
        
        # actual layer
        self.conv1 = conv1x1(in_channels, width)
        self.bn1 = norm_layer(width)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, channels*self.expansion)
        self.bn3 = norm_layer(channels*self.expansion)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)
        
        out += identity
        out = self.relu(out)

        return out

## Resnet

In [None]:
class ResNet(nn.Module):

    def __init__(self, block, layers: List[int],):
        super(ResNet, self).__init__()
        self.norm_layer = nn.BatchNorm2d

        self.in_channels = 64
        self.dilation = 1

        self.conv1 = 
        self.bn1 = 
        self.relu = 
        self.maxpool = 
        self.layer1 = 
        self.layer2 = 
        self.layer3 = 
        
        self.avgpool = 
        self.sigmoid = 
        self.fc = 

        # weight/bias initialize
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
    
    # 블록을 쌓아주는 함수
    def _make_layer(self, block, channels, block_num, stride: int=1) -> nn.Sequential:
        
        layers = []
        layers.append(block(self.in_channels, channels, stride))
        self.in_channels = channels * block.expansion
        for _ in range(1, block_num):
            layers.append(block(self.in_channels, channels, stride))
            
        return nn.Sequential(*layers)

    def forward(self, x):



        return x

In [None]:
resnet = ResNet(BasicBlock, [2, 2, 2, 2])
print(resnet)

# Training

In [None]:
# optimizer, loss function 정의


In [None]:
def accuracy(pred, target):
    y_pred_tag = torch.round(pred)
    correct_sum = (y_pred_tag == target).sum().item()
    acc = correct_sum / target.shape[0]

    return acc

![](https://pytorch.org/tutorials/_images/comp-graph.png)

파이토치의 gradient는 x가 network를 통과하면서 자동으로 계산된다(autograd)</br>
graident의 미분값은 **loss.backward()**를 호출해 계산하고,</br>
**optimizer.step()**을 호출해 parameter(weight, bias)들을 update한다.</br>
따라서 training 동안은 위 함수들을 호출하지만, evaluation 동안은 gradient가 적용되지 않아야 하므로,</br> **torch.no_grad()**로 network 진행동안 graident가 기록되지 않도록 한다.



In [None]:
train_loader = DataLoader(dataset=train_data, batch_size=64, shuffle=True)

# 모델 훈련 코드 작성
def train(model, epoch, dataloader, optimizer, loss_function):
    model.train()
    loss_log = []
    acc_log = []
    with tqdm(dataloader, unit="batch") as tepoch:
        for data, target in tepoch:


            # optimizer gradient 초기화


            # model에 데이터 넣은 결과 생성


            # grad 미분값 계산 후 모델 update

            
            # accuracy & loss 저장 후 출력
            acc = accuracy(output, target.unsqueeze(1))
            loss_log.append(loss.item())
            acc_log.append(acc)
            tepoch.set_postfix(epoch=epoch, step="train", loss=np.mean(loss_log), accuracy=np.mean(acc_log))

# 모델 평가 코드 작성
def eval(model, epoch, dataloader, loss_function):
    model.eval()
    loss_log = []
    acc_log = []
    with tqdm(dataloader, unit="batch") as tepoch:
        # no_grad를 호출해 evaluation동안 gradient 계산 X
        
            for data, target in tepoch:
                
                
                # accuracy & loss 저장 후 출력
                loss_log.append(loss.item())
                acc = accuracy(output, target.unsqueeze(1))
                acc_log.append(acc)
                tepoch.set_postfix(epoch=epoch, step="eval", loss=np.mean(loss_log), accuracy=np.mean(acc_log))


In [None]:
# Train Model



# Evaluation

In [None]:
submit = pd.read_csv('/content/sample_submission.csv')
test_data = CactusData(df = submit, data_dir = test_dir, transform = data_transform)
test_loader = DataLoader(dataset = test_data, shuffle=False)

In [None]:
%%time
predict = []
resnet.eval()
with torch.no_grad():
    for i, (data, _) in enumerate(test_loader):
        data = data.cuda()
        output = resnet(data)    

        predict.append(int(output.item() > 0.5))
    
submit['has_cactus'] = predict
submit.to_csv('/content/submission.csv', index=False)

In [None]:
submit.head()