In [1]:
# 파이토치 호출
import torch
import torch.nn as nn

# 토치비전 transform 및 데이터셋 가져오기
from torchvision.transforms import transforms
from torchvision.datasets import CIFAR10

# 데이터 로더 가져오기
from torch.utils.data.dataloader import DataLoader

# 최적화 알고리즘 가져오기
from torch.optim import optimizer

# numpy
import numpy as np

# matplotlib
import matplotlib.pyplot as plt

# from PIL import image
# import cv2

In [2]:
# 시드 고정
torch.manual_seed(1) #CPU
torch.cuda.manual_seed(1) #GPU
torch.cuda.manual_seed_all(1)
np.random.seed(1)


In [3]:
# 데이터 transform 적용
# 해상도는 논문에서 사용한 224*224 해상도 적용
# transforms.Compose를 사용하여 여러 transform을 적용할 수 있음
data_transforms = transforms.Compose([
    transforms.Resize((224,224)), # 224*224로 크기 조정
    transforms.RandomHorizontalFlip(0.5), # 50% 확률로 좌우 반전
    transforms.ToTensor(), # 텐서로 변환
    transforms.Normalize((0.44671392, 0.43981278, 0.40664902), (0.061943434, 0.061971385, 0.06911716)) # 표준화를 원할 경우,
])

In [4]:
# 데이터 불러오기(fashion MNIST 사용)
path = '/home/xogns5037/딥러닝코딩스터디/data/CIFAR10'

train_data = CIFAR10(
    root = path,
    train=True,
    download=True,
    transform=data_transforms
)

test_data = CIFAR10(
    root = path,
    train=False,
    download=True,
    transform=data_transforms
)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
train_data.data[0].shape # 224 224로 resize가 필요함을 확인

(32, 32, 3)

In [6]:
# 이미지 정규화
imgs = np.array([img.numpy() for img, _ in train_data])
print(f'img shape : {imgs.shape}')

img shape : (50000, 3, 224, 224)


In [7]:
min_r = np.min(imgs, axis=(2,3))[:, 0].min()
min_g = np.min(imgs, axis=(2,3))[:, 1].min()
min_b = np.min(imgs, axis=(2,3))[:, 2].min()

max_r = np.max(imgs, axis=(2,3))[:, 0].max()
max_g = np.max(imgs, axis=(2,3))[:, 1].max()
max_b = np.max(imgs, axis=(2,3))[:, 2].max()

mean_r = np.mean(imgs, axis=(2,3))[:, 0].mean()
mean_g = np.mean(imgs, axis=(2,3))[:, 1].mean()
mean_b = np.mean(imgs, axis=(2,3))[:, 2].mean()

std_r = np.std(imgs, axis=(2,3))[:, 0].std()
std_g = np.std(imgs, axis=(2,3))[:, 1].std()
std_b = np.std(imgs, axis=(2,3))[:, 2].std()

In [8]:
# 표준화 이전
# STL10 데이터의 경우 정규화가 이미 잘 되어있는 데이터
# 표준화가 나을지 정규화가 나을지는 직접 확인하며 작업해야함
print(f'min : {min_r, min_g, min_b}')
print(f'max : {max_r, max_g, max_b}')
print(f'mean : {mean_r, mean_g, mean_b}')
print(f'std : {std_r, std_g, std_b}')

min : (-7.2116427, -7.09703, -5.8834743)
max : (8.932118, 9.03945, 8.584714)
mean : (0.7213963, 0.68330956, 0.57701886)
std : (0.9696073, 0.979834, 0.9745563)


In [9]:
# 표준화 이후
# STL10 데이터의 경우 정규화가 이미 잘 되어있는 데이터
# 표준화가 나을지 정규화가 나을지는 직접 확인하며 작업해야함
print(f'min : {min_r, min_g, min_b}')
print(f'max : {max_r, max_g, max_b}')
print(f'mean : {mean_r, mean_g, mean_b}')
print(f'std : {std_r, std_g, std_b}')

min : (-7.2116427, -7.09703, -5.8834743)
max : (8.932118, 9.03945, 8.584714)
mean : (0.7213963, 0.68330956, 0.57701886)
std : (0.9696073, 0.979834, 0.9745563)


In [6]:
batch_size = 32

train_dataloader = DataLoader(
    train_data,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4  # 4개의 프로세스를 사용하여 데이터를 불러옴
)

test_dataloader = DataLoader(
    test_data,
    batch_size=batch_size,
    shuffle=True,
    num_workers=4
)

In [7]:
(train_dataloader.dataset)

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: /home/xogns5037/딥러닝코딩스터디/data/CIFAR10
    Split: Train
    StandardTransform
Transform: Compose(
               Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=warn)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.44671392, 0.43981278, 0.40664902), std=(0.061943434, 0.061971385, 0.06911716))
           )

In [8]:
# 데이터 확인
print(f'train data 개수 : {len(train_dataloader.dataset)}')
print(f'test data 개수 : {len(test_dataloader.dataset)}')

for x, y in test_dataloader:
    print(f'x shape (N, C, H, W): {x.shape}')
    print(f'y shape : {y.shape, y.dtype}')
    break

train data 개수 : 50000
test data 개수 : 10000
x shape (N, C, H, W): torch.Size([32, 3, 224, 224])
y shape : (torch.Size([32]), torch.int64)


# 모델

In [9]:
class vgg16(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)

        # layer 1
        # 224 * 224 * 3
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True), # inplace=True : 메모리 효율성을 늘리지 않기 위해 사용
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2) #  112 * 112 * 64
        )

        # 112 * 112 * 64
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True), # 112 * 112 * 128
            nn.MaxPool2d(kernel_size=2, stride=2) # 56 * 56 * 128
        )

        # 56 * 56 * 128
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True), # 56 * 56 * 256
            nn.MaxPool2d(kernel_size=2, stride=2) # 28 * 28 * 256
        )

        # 28 * 28 * 256
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True), # 28 * 28 * 512
            nn.MaxPool2d(kernel_size=2, stride=2) # 14 * 14 * 512
        )

        # 14 * 14 * 512
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True), # 14 * 14 * 512
            nn.MaxPool2d(kernel_size=2, stride=2) # 7 * 7 * 512
        )

        self.fc1 = nn.Sequential(
            nn.Linear(in_features=7 * 7 * 512, out_features=4096),
            nn.ReLU(inplace=True),
        )

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(inplace=True),
        )

        self.fc3 = nn.Sequential(
            nn.Linear(in_features=4096, out_features=10)
        )
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)

        # flatten
        x = torch.flatten(x, 1)

        # fc
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)

        return x


In [10]:
model = vgg16()

In [11]:
model

vgg16(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool

In [12]:
torch.cuda.is_available()

True

In [13]:
device = 'cuda'

In [14]:
model.to(device)

vgg16(
  (layer1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU(inplace=True)
    (6): MaxPool

In [15]:
lr= 0.001
optimizer  = torch.optim.Adam(model.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss() # softmax + cross entropy

In [16]:
criterion.to(device)

CrossEntropyLoss()

In [17]:
# 학습
num_epochs = 1 # 에폭 설정

for epoch in range(num_epochs):
    running_loss = 0.0

    for i, data in enumerate(train_dataloader):
        images, labels = data
        images, labels = images.to(device), labels.to(device)

        #Forward pass
        outputs = model(images) # 모델 예측 시행
        loss = criterion(outputs, labels) # 예측에 따른 손실 계산 및 출력

        # backward pass
        optimizer.zero_grad() # 가중치 변화를 0으로 만들고
        loss.backward() # 역전파를 수행하고
        optimizer.step() # 가중치를 갱신

    print(loss.item())

print('학습종료!')

2.304377794265747
학습종료!
