In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils import data
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import numpy as np
import matplotlib.pyplot as plt
import torch.nn.init
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
BATCH_SIZE = 8

NUM_CLASSES = 10
writer = SummaryWriter()

In [None]:
class ConvBlock(nn.Module):
  def __init__(self,in_channels,out_channels,**kwargs):
    super().__init__()
    self.conv=nn.Sequential(
    nn.Conv2d(in_channels,out_channels,bias=False,**kwargs),
    nn.BatchNorm2d(out_channels),
    nn.ReLU(inplace=True)
    )
  def forward(self,x):
    return self.conv(x)




In [None]:
 #Inception Block from googlenet(inception v1)
class InceptionA(nn.Module):
    def __init__(self,input_channel,pool_features):
      super().__init__()
      self.branch1x1=ConvBlock(input_channel,64,kernel_size=1)

      self.branch5x5=nn.Sequential(
          ConvBlock(input_channel,48,kernel_size=1),
          ConvBlock(48,64,kernel_size=5,padding=2)
      )

      self.branch3x3=nn.Sequential(
          ConvBlock(input_channel,64,kernel_size=1),
          ConvBlock(64,96,kernel_size=3,padding=1),
          ConvBlock(96,96,kernel_size=3,padding=1)
      )
      self.branchpool=nn.Sequential(
          nn.AvgPool2d(kernel_size=3,stride=1,padding=1),
          ConvBlock(input_channel,pool_features,kernel_size=3,padding=1)
      )

    def forward(self,x):
      #x->1x1
      branch1x1=self.branch1x1(x)
      #x->1x1->5x5
      branch5x5=self.branch5x5(x)
      #x->1x1->3x3->3x3
      branch3x3=self.branch3x3(x)
      #x->pool->1x1
      branchpool=self.branchpool(x)

      outputs=[branch1x1,branch5x5,branch3x3,branchpool]

      return torch.cat(outputs,1)


In [None]:
class InceptionB(nn.Module):
  def __init__(self,input_channels):
    super().__init__()

    self.branch3x3=ConvBlock(input_channels,384,kernel_size=3,stride=2)
    self.branch3x3stack=nn.Sequential(
        ConvBlock(input_channels,64,kernel_size=1),
        ConvBlock(64,96,kernel_size=3,stride=1),
        ConvBlock(96,96,kernel_size=3,stride=2)
    )
    self.branchpool=nn.MaxPool2d(kernel_size=3,stride=2)

  def forward(self,x):
    #x->3x3
    branch3x3=self.branch3x3(x)
    #x->3x3->3x3
    branch3x3stack=self.branch3x3stack(x)
    #avg->avgpool
    branchpool=self.branchpool(x)
    outputs=[branch3x3,branch3x3stack,branchpool]

    return torch.cat(outputs,dim=1)

In [None]:
class InceptionC(nn.Module):

    def __init__(self, in_channels, channels_7x7, conv_block=None):
        super(InceptionC, self).__init__()
        if conv_block is None:
            conv_block = ConvBlock
        self.branch1x1 = conv_block(in_channels, 192, kernel_size=1)

        c7 = channels_7x7
        self.branch7x7_1 = conv_block(in_channels, c7, kernel_size=1)
        self.branch7x7_2 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
        self.branch7x7_3 = conv_block(c7, 192, kernel_size=(7, 1), padding=(3, 0))

        self.branch7x7dbl_1 = conv_block(in_channels, c7, kernel_size=1)
        self.branch7x7dbl_2 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
        self.branch7x7dbl_3 = conv_block(c7, c7, kernel_size=(1, 7), padding=(0, 3))
        self.branch7x7dbl_4 = conv_block(c7, c7, kernel_size=(7, 1), padding=(3, 0))
        self.branch7x7dbl_5 = conv_block(c7, 192, kernel_size=(1, 7), padding=(0, 3))

        self.branch_pool = conv_block(in_channels, 192, kernel_size=1)

    def _forward(self, x):
        branch1x1 = self.branch1x1(x)

        branch7x7 = self.branch7x7_1(x)
        branch7x7 = self.branch7x7_2(branch7x7)
        branch7x7 = self.branch7x7_3(branch7x7)

        branch7x7dbl = self.branch7x7dbl_1(x)
        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)

        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
        branch_pool = self.branch_pool(branch_pool)

        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
        return outputs

    def forward(self, x):
        outputs = self._forward(x)
        return torch.cat(outputs, 1)


In [None]:
class InceptionD(nn.Module):

  def __init__(self,input_channels):
    super().__init__()

    self.branch3x3=nn.Sequential(
        ConvBlock(input_channels,192,kernel_size=1),
        ConvBlock(192,320,kernel_size=3,stride=2)
    )
    self.branch7x7=nn.Sequential(
        ConvBlock(input_channels,192,kernel_size=1),
        ConvBlock(192,192,kernel_size=(1,7),padding=(0,3)),
        ConvBlock(192,192,kernel_size=(7,1),padding=(3,0)),
        ConvBlock(192,192,kernel_size=3,stride=2)
    )
    self.branchpool=nn.AvgPool2d(kernel_size=3,stride=2)

  def forward(self,x):
    #x->1x1->3x3
    branch3x3=self.branch3x3(x)
    #x->1x1->7x1->1x7->3x3
    branch7x7=self.branch7x7(x)
    #x->avgpool
    branchpool=self.branchpool(x)
    outputs=[branch3x3,branch7x7,branchpool]

    return torch.cat(outputs,1)

In [None]:
class InceptionE(nn.Module):
  def __init__(self,input_channels):
    super().__init__()
    self.branch1x1=ConvBlock(input_channels,320,kernel_size=1)

    self.branch3x3_1=ConvBlock(input_channels,384,kernel_size=1)
    self.branch3x3_2a=ConvBlock(384,384,kernel_size=(1,3),padding=(0,1))
    self.branch3x3_2b=ConvBlock(384,384,kernel_size=(3,1),padding=(1,0))

    self.branch3x3stack_1=ConvBlock(input_channels,448,kernel_size=1)
    self.branch3x3stack_2=ConvBlock(448,384,kernel_size=3,padding=1)
    self.branch3x3stack_3a=ConvBlock(384,384,kernel_size=(1,3),padding=(0,1))
    self.branch3x3stack_3b=ConvBlock(384,384,kernel_size=(3,1),padding=(1,0))

    self.branch_pool=nn.Sequential(
        nn.AvgPool2d(kernel_size=3,stride=1,padding=1),
        ConvBlock(input_channels,192,kernel_size=1)

    )

  def forward(self,x):
    #x->1x1
    branch1x1=self.branch1x1(x)
    #x->1x1->3x1
    #x->1x1->1x3
    branch3x3=self.branch3x3_1
    branch3x3=[
        self.branch3x3_2a(branch3x3),
        self.branch3x3_2b(branch3x3)
    ]
    branch3x3=torch.cat(branch3x3,1)

    #x->1x1->3x3->1x3
    #x->1x1->3x3->3x1
    branch3x3stack=self.branch3x3stack_1(x)
    branch3x3stack=self.branch3x3stack_2(branch3x3stack)
    branch3x3stack=[
        self.branch3x3stack_3a(branch3x3stack),
        self.branch3x3stack_3b(branch3x3stack)
    ]
    branch3x3stack=torch.cat(branch3x3,1)
    branchpool=self.branch_pool(x)

    outputs=[branch1x1,branch3x3,branch3x3stack,branchpool]

    return torch.cat(outputs,1)



In [None]:
class InceptionV3(nn.Module):
  def __init__(self,input_channel,n_classes=10):
    super().__init__()
    self.Conv2d_1a_3x3=ConvBlock(input_channel,32,kernel_size=3,padding=1)
    self.Conv2d_2a_3x3=ConvBlock(32,32,kernel_size=3,padding=1)
    self.Conv2d_2b_3x3=ConvBlock(32,64,kernel_size=3,padding=1)
    self.maxpool_1=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    self.Conv2d_3b_1x1=ConvBlock(64,80,kernel_size=1)
    self.Conv2d_4a_3x3=ConvBlock(80,192,kernel_size=3)
    self.maxpool_1=nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
    #원래 Inception module
    self.Mixed_5b=InceptionA(192,pool_features=32)
    self.Mixed_5c=InceptionA(256,pool_features=64)
    self.Mixed_5d=InceptionA(288,pool_features=64)

    #downsampling
    self.Mixed_6a=InceptionB(288)

    self.Mixed_6b=InceptionC(768,channels_7x7=128)
    self.Mixed_6c=InceptionC(768,channels_7x7=160)
    self.Mixed_6d=InceptionC(768,channels_7x7=160)
    self.MIxed_6e=InceptionC(768,channels_7x7=192)

    #downsampling
    self.Mixed_7a=InceptionD(768)

    self.Mixed_7b=InceptionE(1280)
    self.Mixed_7c=InceptionE(2048)

    #6x6feature size
    self.avgpool=nn.AdaptiveAvgPool2d((1,1))
    self.dropout=nn.Dropout2d()
    self.linear=nn.Linear(2048,n_classes)

  def forward(self,x):
    #32->30
    x=self.Conv2d_1a_3x3(x)
    x=self.Conv2d_2a_3x3(x)
    x=self.Conv2d_2b_3x3(x)

    x=self.maxpool_1(x)

    x=self.Conv2d_3b_1x1(x)
    x=self.Conv2d_4a_3x3(x)

    x=self.maxpool_2(x)

    #30->30
    x=self.Mixed_5b(x)
    x=self.Mixed_5c(x)
    x=self.Mixed_5d(x)

    #30->14
    #bottleneck
    x=self.Mixed_6a(x)

    #14->14
    x=self.Mixed_6b(x)
    x=self.Mixed_6c(x)
    x=self.Mixed_6d(x)
    x=self.Mixed_6e(x)

    #14->6
    x=self.Mixed_7a(x)

    #6->6
    x=self.Mixed_7b(x)
    x=self.Mixed_7c(x)

    #6->1
    x=self.avgpool(x)
    x=self.dropout(x)
    x=x.view(x.size(0),-1)
    x=self.linear(x)
    return x

In [None]:
transform=transforms.Compose([#Compose는 Transform할 리스트를 구성함

    transforms.Resize((299,299)),
    transforms.ToTensor()#ToTensor: PIL image 혹은 numpy.ndarray를 Tensor로 바꿈
])
mnist_train=datasets.MNIST(root='MNIST_data/',#MNIST 데이터 다운로드 경로 설정
                        train=True,#훈련용 데이터로 다운받을건지에 대한 여부 True시 훈련 데이터로 다운로드 받음
                        download=True,
                        transform=transform)
mnist_test=datasets.MNIST(root='MNIST_data/',#MNIST 데이터 다운로드 경로 설정
                       train=False,#훈련용 데이터로 다운 받을건지에 대한 여부 FALSE로 설정하였음으로 훈련용 데이터가 아닌 테스트용 데이터로 받음
                       download=True,
                       transform=transform)

train_loader=torch.utils.data.DataLoader(dataset=mnist_train,batch_size=BATCH_SIZE,shuffle=True,drop_last=True)
test_loader=torch.utils.data.DataLoader(dataset=mnist_test,batch_size=BATCH_SIZE,shuffle=True,drop_last=True)

In [None]:
model =InceptionV3(1).to(device) # to()로 모델에 gpu 사용
criterion = F.nll_loss # nll_loss : negative log likelihood loss
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.045,alpha=0.9, eps=1.0) # model(신경망) 파라미터를 optimizer에 전달해줄 때 nn.Module의 parameters() 메소드를 사용


In [None]:
from torchsummary import summary as summary_
print(model)

InceptionV3(
  (Conv2d_1a_3x3): ConvBlock(
    (conv): Sequential(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (Conv2d_2a_3x3): ConvBlock(
    (conv): Sequential(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (Conv2d_2b_3x3): ConvBlock(
    (conv): Sequential(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
  )
  (Conv2d_3b_1x1): ConvBlock(
    (conv): Sequential(
      (0): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(80, eps=1e-05, m

In [None]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # enumberate() : 인덱스와 원소로 이루어진 튜플(tuple)을 만들어줌
        target = target.type(torch.LongTensor)
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() # 항상 backpropagation 하기전에 미분(gradient)을 zero로 만들어주고 시작해야 한다.
        output = model(data)
        loss = criterion(output, target) # criterion = loss_fn
        writer.add_scalar("Loss/Alex train",loss,epoch)
        loss.backward() # Computes the gradient of current tensor w.r.t. graph leaves
        optimizer.step() # step() : 파라미터를 업데이트함
        if (batch_idx + 1) % 30 == 0:
            print("Train Epoch:{} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [None]:
import torch.nn.functional as F
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.cross_entropy(output, target).item()
            writer.add_scalar("Loss/Alex test",test_loss/len(test_loader),epoch)
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()

        test_loss /= len(test_loader.dataset)  # -> mean
        print("\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
            test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset)))
        print('='*50)


In [None]:
epochs=5
for epoch in range(1,epochs+1):
  train(model,device,train_loader,optimizer,epoch)



writer.flush()
writer.close()

OutOfMemoryError: CUDA out of memory. Tried to allocate 88.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 23.06 MiB is free. Process 8600 has 14.72 GiB memory in use. Of the allocated memory 14.55 GiB is allocated by PyTorch, and 48.48 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)