In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
import torch.nn as nn
import torch

from torch import Tensor
from typing import Type

In [4]:
class BasicBlock(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        stride: int = 1,
        expansion: int = 1,
        downsample: nn.Module = None
    ) -> None:
        super(BasicBlock, self).__init__()
        self.expansion = expansion
        self.downsample = downsample
        self.conv1 = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(
            out_channels,
            out_channels*self.expansion,
            kernel_size=3,
            padding=1,
            bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels*self.expansion)

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return  out


In [5]:
class ResNet(nn.Module):
    def __init__(
        self,
        img_channels: int,
        num_layers: int,
        block: Type[BasicBlock],
        num_classes: int  = 1000
    ) -> None:
        super(ResNet, self).__init__()
        if num_layers == 18:
            layers = [2, 2, 2, 2]
            self.expansion = 1

        self.in_channels = 64
        self.conv1 = nn.Conv2d(
            in_channels=img_channels,
            out_channels=self.in_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512*self.expansion, num_classes)

    def _make_layer(
        self,
        block: Type[BasicBlock],
        out_channels: int,
        blocks: int,
        stride: int = 1
    ) -> nn.Sequential:
        downsample = None
        if stride != 1:
            downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels,
                    out_channels*self.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(out_channels * self.expansion),
            )
        layers = []
        layers.append(
            block(
                self.in_channels, out_channels, stride, self.expansion, downsample
            )
        )
        self.in_channels = out_channels * self.expansion

        for i in range(1, blocks):
            layers.append(block(
                self.in_channels,
                out_channels,
                expansion=self.expansion
            ))
        return nn.Sequential(*layers)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        print('Dimensions of the last convolutional feature map: ', x.shape)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

임시 코드 
- 학습 : batch 3번, 에폭 3번만 진행
- 검증 루프에서 3번의 배치만 진행

Dimensions of the last convolutional feature map  
-> ResNet 모델의 forward 메서드에서 마지막 합성곱 층의 출력 텐서의 크기를 출력하는 디버그 메시

In [10]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from sklearn.metrics import accuracy_score
from typing import Type

# 이미지 파일이 있는 디렉토리 경로
train_image_directory = '../dataset/training_image'
valid_image_directory = '../dataset/validation_image'

# CustomDataset 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, image_directory, transform=None):
        self.image_directory = image_directory
        self.image_files = [f for f in os.listdir(image_directory) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        file_name = self.image_files[idx]
        image_path = os.path.join(self.image_directory, file_name)
        image = Image.open(image_path).convert('RGB')
        
        # 이미지 파일명에서 {W/T}_{스타일별} 정보 추출
        parts = file_name.split('_')
        if len(parts) < 4:
            raise ValueError(f"Invalid file name format: {file_name}")
        
        gender = parts[0]  # 성별 (첫 번째 부분)
        style = parts[2]  # 스타일 (세 번째 부분)
        
        # {W/T}_{스타일별} 형식으로 레이블 생성
        label = f"{gender}_{style}"
        label_idx = label_to_index[label]
        
        if self.transform:
            image = self.transform(image)
        return image, label_idx

# 레이블을 숫자로 매핑하기 위한 딕셔너리 생성
label_set = set()
for file_name in os.listdir(train_image_directory) + os.listdir(valid_image_directory):
    parts = file_name.split('_')
    if len(parts) >= 4:
        gender = parts[0]
        style = parts[2]
        label_set.add(f"{gender}_{style}")

label_list = sorted(list(label_set))
label_to_index = {label: idx for idx, label in enumerate(label_list)}
index_to_label = {idx: label for label, idx in label_to_index.items()}

# 데이터 전처리 및 로드
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_image_directory, transform=transform)
val_dataset = CustomDataset(valid_image_directory, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# ResNet-18 모델 생성 함수
def resnet18(img_channels: int, num_classes: int) -> ResNet:
    return ResNet(img_channels, 18, BasicBlock, num_classes)

# ResNet 클래스 정의
class BasicBlock(nn.Module):
    def __init__(self, in_channels: int, out_channels: int, stride: int = 1, expansion: int = 1, downsample: nn.Module = None) -> None:
        super(BasicBlock, self).__init__()
        self.expansion = expansion
        self.downsample = downsample
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels * self.expansion, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels * self.expansion)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, img_channels: int, num_layers: int, block: Type[BasicBlock], num_classes: int = 1000) -> None:
        super(ResNet, self).__init__()
        if num_layers == 18:
            layers = [2, 2, 2, 2]
            self.expansion = 1

        self.in_channels = 64
        self.conv1 = nn.Conv2d(in_channels=img_channels, out_channels=self.in_channels, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * self.expansion, num_classes)

    def _make_layer(self, block: Type[BasicBlock], out_channels: int, blocks: int, stride: int = 1) -> nn.Sequential:
        downsample = None
        if stride != 1:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels * self.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * self.expansion),
            )
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, self.expansion, downsample))
        self.in_channels = out_channels * self.expansion

        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels, expansion=self.expansion))
        return nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        print('Dimensions of the last convolutional feature map: ', x.shape)  # 디버그 메시지

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# 모델 인스턴스 생성
num_classes = len(label_list)  # 레이블의 총 개수
model = resnet18(img_channels=3, num_classes=num_classes)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 모델 학습 (3번의 배치만 사용)
num_epochs = 3
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for i, (images, labels) in enumerate(train_loader):
        if i >= 3:  # 3번의 배치만 사용
            break
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/3}')  # 3번의 배치만 사용했으므로 3으로 나눔

# 모델 검증 (3번의 배치만 사용)
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for i, (images, labels) in enumerate(val_loader):
        if i >= 3:  # 3번의 배치만 사용
            break
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 정확도 계산
accuracy = accuracy_score(all_labels, all_preds)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

# 예측 결과를 {W/T}_{스타일별} 형식으로 변환하여 출력
def predict_image(image_path, model, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # 배치 차원 추가
    model.eval()
    with torch.no_grad():
        output = model(image)
        _, pred = torch.max(output, 1)
    return index_to_label[pred.item()]

# 예시 이미지 예측
example_image_path = '../dataset/example.jpg'
predicted_label = predict_image(example_image_path, model, transform)
print(f'Predicted: {predicted_label}')

Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Epoch [1/3], Loss: 3.365297317504883
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Epoch [2/3], Loss: 2.705004851023356
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Epoch [3/3], Loss: 2.51450244585673
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the last convolutional feature map:  torch.Size([32, 512, 7, 7])
Dimensions of the las

실제 실행할 코드

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from sklearn.metrics import accuracy_score
from typing import Type

# 이미지 파일이 있는 디렉토리 경로
train_image_directory = '../dataset/training_image'
valid_image_directory = '../dataset/validation_image'

# CustomDataset 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, image_directory, transform=None):
        self.image_directory = image_directory
        self.image_files = [f for f in os.listdir(image_directory) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        file_name = self.image_files[idx]
        image_path = os.path.join(self.image_directory, file_name)
        image = Image.open(image_path).convert('RGB')
        
        # 이미지 파일명에서 {W/T}_{스타일별} 정보 추출
        parts = file_name.split('_')
        if len(parts) < 4:
            raise ValueError(f"Invalid file name format: {file_name}")
        
        gender = parts[0]  # 성별 (첫 번째 부분)
        style = parts[2]  # 스타일 (세 번째 부분)
        
        # {W/T}_{스타일별} 형식으로 레이블 생성
        label = f"{gender}_{style}"
        
        if self.transform:
            image = self.transform(image)
        return image, label

# 데이터 전처리 및 로드
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_dataset = CustomDataset(train_image_directory, transform=transform)
val_dataset = CustomDataset(valid_image_directory, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# 레이블을 숫자로 매핑하기 위한 딕셔너리 생성
label_set = set()
for file_name in os.listdir(train_image_directory) + os.listdir(valid_image_directory):
    parts = file_name.split('_')
    if len(parts) >= 4:
        gender = parts[0]
        style = parts[2]
        label_set.add(f"{gender}_{style}")

label_list = sorted(list(label_set))
label_to_index = {label: idx for idx, label in enumerate(label_list)}
index_to_label = {idx: label for label, idx in label_to_index.items()}

# CustomDataset 클래스 수정
class CustomDataset(Dataset):
    def __init__(self, image_directory, transform=None):
        self.image_directory = image_directory
        self.image_files = [f for f in os.listdir(image_directory) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        file_name = self.image_files[idx]
        image_path = os.path.join(self.image_directory, file_name)
        image = Image.open(image_path).convert('RGB')
        
        # 이미지 파일명에서 {W/T}_{스타일별} 정보 추출
        parts = file_name.split('_')
        if len(parts) < 4:
            raise ValueError(f"Invalid file name format: {file_name}")
        
        gender = parts[0]  # 성별 (첫 번째 부분)
        style = parts[2]  # 스타일 (세 번째 부분)
        
        # {W/T}_{스타일별} 형식으로 레이블 생성
        label = f"{gender}_{style}"
        label_idx = label_to_index[label]
        
        if self.transform:
            image = self.transform(image)
        return image, label_idx

# 데이터셋 다시 로드
train_dataset = CustomDataset(train_image_directory, transform=transform)
val_dataset = CustomDataset(valid_image_directory, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# ResNet-18 모델 생성 함수
def resnet18(img_channels: int, num_classes: int) -> ResNet:
    return ResNet(img_channels, 18, BasicBlock, num_classes)

# 모델 인스턴스 생성
num_classes = len(label_list)  # 레이블의 총 개수
model = resnet18(img_channels=3, num_classes=num_classes)

# 손실 함수 및 옵티마이저 정의
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 모델 학습
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}')

# 모델 검증
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for images, labels in val_loader:
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# 정확도 계산
accuracy = accuracy_score(all_labels, all_preds)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

# 예측 결과를 {W/T}_{스타일별} 형식으로 변환하여 출력
def predict_image(image_path, model, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # 배치 차원 추가
    model.eval()
    with torch.no_grad():
        output = model(image)
        _, pred = torch.max(output, 1)
    return index_to_label[pred.item()]

In [None]:
# 예시 이미지 예측
example_image_path = '../dataset/example.jpg'
predicted_label = predict_image(example_image_path, model, transform)
print(f'Predicted: {predicted_label}')

.