In [309]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets, transforms
from torch._C import _ImperativeEngine as ImperativeEngine
from torch.autograd import Variable
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"GPU/CPU: {device}")

GPU/CPU: cpu


In [310]:
batch_size = 64
lr = 0.001
num_epochs = 20
num_classes = 102

## 1. 전처리(이미지)

### 중요

In [311]:
train_transforms = transforms.Compose([
    transforms.Resize((32,32)), # 1. 이미지를 32, 32로 사이즈를 변경해야 함
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(), # 2. 텐서로 변경해야 함
])
val_transforms = transforms.Compose([
    transforms.Resize((32,32)),
    transforms.ToTensor(),
])

## 2. 데이터 불러오기

In [312]:
train_dataset = datasets.Flowers102(root="data", split="train", transform=train_transforms, download=True)
val_dataset = datasets.Flowers102(root="data", split="val", transform=val_transforms, download=True)
test_dataset = datasets.Flowers102(root="data", split="test", transform=val_transforms, download=True)

In [313]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [314]:
print(f"훈련 데이터: {len(train_loader)}")
print(f"훈련 데이터: {len(val_loader)}")
print(f"훈련 데이터: {len(test_loader)}")

훈련 데이터: 16
훈련 데이터: 16
훈련 데이터: 97


In [315]:
data_iter = iter(train_loader)
sample_batch = next(data_iter)
sample_image, sample_label = sample_batch
sample_image.shape

torch.Size([64, 3, 32, 32])

## 모델 설계(수업에 사용된) 학습

In [316]:
class LeNet5Classic(nn.Module):
    def __init__(self, *args, **kwargs):
        super(LeNet5Classic, self).__init__()
        # conv1 -> conv2 -> conv3 -> fc1 -> fc2
        self.conv1 = nn.Conv2d(3, 6, kernel_size=5)
        self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
        self.conv3 = nn.Conv2d(16, 120, kernel_size=5)
        self.fc1 = nn.Linear(120, 84)
        self.fc2 = nn.Linear(84, num_classes)
        self.relu = nn.ReLU()
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x))) # 32 * 32 * 3 -> 28 * 28 * 6 -> 14 * 14* 6
        x = self.pool(self.relu(self.conv2(x))) # 14 * 14 * 6 -> 10 * 10 * 16 -> 5 * 5 * 16
        x = self.relu(self.conv3(x))  # 5 * 5 * 16 -> 1 * 1 * 120
        x = torch.flatten(x,1) # 120 // 중요 : 이렇게 해야 데이터를 선형모델에 넣을 수 있
        x = self.relu(self.fc1(x)) # 120 -> 84
        x = self.fc2(x) # 84 -> num_classes
        return x

In [317]:
class LeNet5Modern(nn.Module):
    def __init__(self, *args, **kwargs):
        super(LeNet5Modern, self).__init__()
        # 특성추출
        self.features = nn.Sequential(
            # 첫번째 합성곱 레이어: 3 -> 6 채널
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # 두번째 합성곱 레이어: 6 -> 16 채널
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            # 세번째 합성곱 레이어: 16 -> 120 채널 (특징이 120개 나온다고 생각)
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(120),
            nn.ReLU(inplace=True),
        )
        # 분류
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(120, 84),
            nn.ReLU(inplace=True),
            nn.Dropout(0.3),
            nn.Linear(84, num_classes)
        )
    def _init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode="fan-out", nonlinearity="relu")
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
    def forward(self, x):
        pass

In [318]:
class Flowers102CNN(nn.Module):
    def __init__(self):
        super(Flowers102CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1), # 32 * 32 * 3 가 들어가서 => 32 * 32 * 32 가 나옴 # Out = (입력크기 + 2*패딩 - 커널사이즈) / 1+1 = 32,
            nn.BatchNorm2d(32), # 배치정규화(흩어져있는 특징 가운데로 모아줌) => 32
            nn.ReLU(), # 32
            nn.MaxPool2d(kernel_size=2, stride=2) # 16 * 16 * 32 => 입력된 이미지 사이즈가 작아짐
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU(), # 14 * 14 * 64
            nn.MaxPool2d(kernel_size=2, stride=2) # 특성을 도드라지게 함
        )
        self.fc1 = nn.Linear(64 * 7 * 7, 512)
        self.drop = nn.Dropout(0.5)
        self.fc2 = nn.Linear(in_features=512, out_features=256)
        self.drop = nn.Dropout(0.25)
        self.fc3 = nn.Linear(in_features=256, out_features=num_classes)

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0), -1) # flatten
        x = self.fc1(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

model = Flowers102CNN()

## 학습, 평가

In [319]:
model = Flowers102CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=lr)

In [320]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=num_epochs):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            # 순전파
            outputs = model(images)
            loss = criterion(outputs, labels)

            # 역전파 및 최적화
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Test Accuracy: {accuracy:.2f}%")


In [None]:
train_model(model, train_loader, criterion, optimizer, num_epochs=num_epochs)
evaluate_model(model, test_loader)