CNN : 컨볼루션을 활용해 이미지의 특징을 계층적으로 추출해서 이미지 인식
- 구성 : 컨볼루션, 풀링, 일반 인공신경망
- 1) 컨볼루션 : 필터(커널) -> 특징 맵(feature map)
- 2) 풀링 : 특징 맵을 줄이는 것 (과적합 방지)
- 3) 일반 인공신경망 : 1차원으로 변환한 특징 맵을 통해

# 5.2 CNN 모델 구현

In [None]:
# 1. 라이브러리
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets # torchvision : 데이터셋, 모델, 전처리 도구들을 포함하고 있는 패키지

In [None]:
#2. cuda 사용여부 판별
USE_CUDA=torch.cuda.is_available() # gpu 사용가능하면 true 반환
DEVICE=torch.device('cuda' if USE_CUDA else 'cpu') # gpu 사용 가능하면 gpu 사용

In [None]:
#3. 하이퍼 파라미터값 설정
EPOCHS=40
BATCH_SIZE=64

In [None]:
#4. 데이터셋 불러오기
# transform = (전처리) ToTensor()(텐서화: 모든 이미지의 픽셀값 0~1 사이), Normalize(픽셀값-평균/표준편차)로 정규화
# torchvision.dataset.FashionMNIST로 데이터 './.data'에 데이터를 다운받음
train_loader=torch.utils.data.DataLoader(datasets.FashionMNIST('./.data',train=True,download=True,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0,1307,),(0,3081,))])),batch_size=BATCH_SIZE,shuffle=True)
test_loader=torch.utils.data.DataLoader(datasets.FashionMNIST('./.data',train=False,transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0,1307,),(0,3081,))])),batch_size=BATCH_SIZE,shuffle=True)

In [None]:
# CNN 클래스 정의 
#1. 컨볼루션 계층 (2개의 계층으로 총 20개의 feature map 생성)-> 드롭아웃 -> 일반신경망
class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    self.conv1=nn.Conv2d(1,10,kernel_size=5)
    self.conv2=nn.Conv2d(10,20,kernel_size=5) # nn.Conv2d(입력채널수, 출력채널수,커널 사이즈)
    self.conv2_drop=nn.Dropout2d() # 컨볼루션 결과값은 드롭아웃
    self.fc1=nn.Linear(320,50)
    self.fc2=nn.Linear(50,10) # 출력을 분류할 클래스 개수 : 10 

  def forward(self,x): # 입력부터 출력까지 데이터 파이프라인
    x=F.relu(F.max_pool2d(self.conv1(x),2)) # 컨볼루션 -> 풀링-> 활성화
    x=F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)),2)) #  
    x=x.view(-1,320) # 2차원 특징맵 -> 1차원
    x=F.relu(self.fc1(x))
    x=F.dropout(x,training=self.training)
    x=self.fc2(x)
    return x # 0부터 9 레이블 중 하나 반환 



In [None]:
# cnn 모델의 인스턴스와 최적화 함수
model=Net().to(DEVICE)
optimizer=optim.SGD(model.parameters(),lr=0.01,momentum=0.5)

In [None]:
#train 
def train(model,train_loader,optimizer, epoch):
  model.train()
  for batch_idx,(data,target) in enumerate(train_loader):
    data,target=data.to(DEVICE),target.to(DEVICE)
    optimizer.zero_grad() # 그래디언트값 초기화
    output=model(data) # forward pass
    loss=F.cross_entropy(output,target) # target 값과의 오차
    loss.backward() # backward pass
    optimizer.step() # 모델의 학습파라미터 갱신

    if batch_idx%200 ==0:
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [None]:
def evaluate(model,test_loader):
  model.eval()
  test_loss=0
  correct=0
  with torch.no_grad():
    for data,target in test_loader:
      data,target=data.to(DEVICE),target.to(DEVICE)
      output=model(data)

      # 배치 오차를 합산
      test_loss+=F.cross_entropy(output,target,reduction='sum').item()

      # 가장 높은 값을 가진 인덱스가 바로 예측값
      pred=output.max(1,keepdim=True)[1]
      correct+=pred.eq(target.view_as(pred)).sum().item()

    test_loss /=len(test_loader.dataset)
    test_accuracy=100.*correct / len(test_loader.dataset)
    return test_loss,test_accuracy

In [None]:
# 실행
for epoch in range(1,EPOCHS+1):
  train(model,train_loader,optimizer,epoch)
  test_loss, test_accuracy = evaluate(model,test_loader)

  print('[{}] Test Loss: {:.4f},Accuracy : {:.2f}%'.format(epoch,test_loss,test_accuracy))

[1] Test Loss: 0.1111,Accuracy : 96.50%
[2] Test Loss: 0.0915,Accuracy : 96.97%
[3] Test Loss: 0.0767,Accuracy : 97.56%
[4] Test Loss: 0.0719,Accuracy : 97.75%
[5] Test Loss: 0.0665,Accuracy : 97.90%
[6] Test Loss: 0.0588,Accuracy : 98.21%
[7] Test Loss: 0.0560,Accuracy : 98.27%
[8] Test Loss: 0.0500,Accuracy : 98.51%
[9] Test Loss: 0.0497,Accuracy : 98.51%
[10] Test Loss: 0.0492,Accuracy : 98.50%
[11] Test Loss: 0.0466,Accuracy : 98.51%
[12] Test Loss: 0.0450,Accuracy : 98.56%
[13] Test Loss: 0.0419,Accuracy : 98.65%
[14] Test Loss: 0.0421,Accuracy : 98.76%
[15] Test Loss: 0.0400,Accuracy : 98.72%
[16] Test Loss: 0.0413,Accuracy : 98.71%
[17] Test Loss: 0.0423,Accuracy : 98.64%
[18] Test Loss: 0.0384,Accuracy : 98.79%
[19] Test Loss: 0.0380,Accuracy : 98.87%
[20] Test Loss: 0.0372,Accuracy : 98.87%
[21] Test Loss: 0.0382,Accuracy : 98.85%
[22] Test Loss: 0.0374,Accuracy : 98.91%
[23] Test Loss: 0.0367,Accuracy : 98.86%
[24] Test Loss: 0.0375,Accuracy : 98.86%
[25] Test Loss: 0.0359,Ac

#5.3 ResNet

In [None]:
#기본 구조 : 컨볼루션층의 출력에 전의 전 계층에 쓰였던 입력을 더함으로써 특징이 유실되지 않도록 함 
# 컬러 데이터 셋 학습 

In [None]:
# resnet은 residual 블록을 반복적으로 쌓아서 신경망을 깊게한 모델
# residual 블록 : x+F(x)(=컨볼루션->relu->컨볼루션)

In [None]:
#hyper-parameter
EPOCHS=300
BATCH_SIZE=128

In [None]:
#데이터셋
train_loader=torch.utils.data.DataLoader(datasets.CIFAR10('./.data',train=True,download=True,transform=transforms.Compose([transforms.RandomCrop(32,padding=4),
                                                                                                                           transforms.RandomHorizontalFlip(),transforms.ToTensor(),
                                                                                                                           transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])),batch_size=BATCH_SIZE,shuffle=True)

test_loader=torch.utils.data.DataLoader(datasets.CIFAR10('./.data',train=False,transform=transforms.Compose([transforms.ToTensor(),
                                                                                                                           transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])),batch_size=BATCH_SIZE,shuffle=True)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./.data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./.data/cifar-10-python.tar.gz to ./.data


In [None]:
class BasicBlock(nn.Module): 
    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3,
                               stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes) # 배치 정규화 : 배치마다 데이터의 분포를 정규화 
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential( # short cut 
                nn.Conv2d(in_planes, planes,
                          kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x))) # 컨볼루션 -> 배치 정규화 -> 활성화
        out = self.bn2(self.conv2(out)) # 컨볼루션 -> 배치정규화
        out += self.shortcut(x) # 컨볼루션 -> 활성화 -> 컨볼루션 거친 값 + x
        out = F.relu(out)
        return out # 한 residual block을 거친 출력값

In [None]:
class ResNet(nn.Module):
  def __init__(self,num_classes=10):
    super(ResNet,self).__init__()
    self.in_planes=16

    self.conv1=nn.Conv2d(3,16,kernel_size=3,stride=1,padding=1,bias=False)
    self.bn1=nn.BatchNorm2d(16)
    self.layer1=self._make_layer(16,2,stride=1)
    self.layer2=self._make_layer(32,2,stride=2)
    self.layer3=self._make_layer(64,2,stride=2)
    self.linear=nn.Linear(64,num_classes)

  def _make_layer(self,planes,num_blocks,stride): # basic block을 num_blocks만큼 생성
      strides=[stride]+[1]*(num_blocks-1)
      layers=[]
      for stride in strides:
        layers.append(BasicBlock(self.in_planes,planes,stride)) # in_planes : 입력채널수 , planes : 출력 채널 수
        self.in_planes=planes
      return nn.Sequential(*layers) # basic block을 순차적으로 연결

  def forward(self,x): # resnet 모델의 데이터 흐름
      out=F.relu(self.bn1(self.conv1(x)))
      out=self.layer1(out) # 미리 정의해둔 layer들을 통과 
      out=self.layer2(out) # 각 layer는 2개의 basic block을 가짐
      out=self.layer3(out)
      out=F.avg_pool2d(out,8) #평균풀링 
      out=out.view(out.size(0),-1) # 신경망 입력으로 넣기 위해
      out=self.linear(out)
      return out

In [None]:
model=ResNet().to(DEVICE)
optimizer = optim.SGD(model.parameters(), lr=0.1,
                      momentum=0.9, weight_decay=0.0005)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1) # 학습률 감소 기법 : 학습률을 점점 낮춰서 더 정교하게 최적화

print(model)

ResNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential()
    )
    (1): BasicBlock(
      (conv1): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=

In [None]:
for epoch in range(1, EPOCHS + 1):
    scheduler.step()
    train(model, train_loader, optimizer, epoch)
    test_loss, test_accuracy = evaluate(model, test_loader)
    
    print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(
          epoch, test_loss, test_accuracy))



[1] Test Loss: 1.7377, Accuracy: 41.16%
[2] Test Loss: 1.2208, Accuracy: 56.76%
[3] Test Loss: 1.2478, Accuracy: 60.76%
[4] Test Loss: 1.2223, Accuracy: 62.50%
[5] Test Loss: 0.8978, Accuracy: 70.23%
[6] Test Loss: 0.8506, Accuracy: 71.13%
[7] Test Loss: 0.9609, Accuracy: 68.96%
[8] Test Loss: 0.8471, Accuracy: 69.91%
[9] Test Loss: 0.8378, Accuracy: 72.19%
[10] Test Loss: 1.2642, Accuracy: 62.88%
[11] Test Loss: 0.9547, Accuracy: 66.82%
[12] Test Loss: 1.0452, Accuracy: 66.08%
