# 2024 데이터 크리에이터 캠프

문제: 인공지능은 사람의 마음을 이해할수 있을까?

### 구글 드라이브 연결

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Mission1. 패션 스타일 이미지 분류

### Mission 1-2.  
ResNet-18를 활용하여 “성별 & 스타일” 단위로 클래스 분류를 수행하고 Validation 데이터에 대한 정확도를 제시한다.   
 - ResNet-18의 parameters는 무작위로 초기화하여 사용한다. (즉, pretrained weights는 사용할 수 없음)  
 - 성능을 높이기 위해 object detection, image cropping 등의 다양한 데이터 전처리 기법을 활용해도 무방하다.    
 (데이터 전처리 단계에 한해서는 외부 라이브러리 활용 가능)   

### 라이브러리 불러오기

In [None]:
import os
import torch
from torch import Tensor
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from sklearn.metrics import accuracy_score
from typing import Type

### Resnet 구현

In [None]:
class BasicBlock(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        stride: int = 1,
        expansion: int = 1,
        downsample: nn.Module = None
    ) -> None:
        super(BasicBlock, self).__init__()
        self.expansion = expansion
        self.downsample = downsample
        self.conv1 = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(
            out_channels,
            out_channels*self.expansion,
            kernel_size=3,
            padding=1,
            bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels*self.expansion)

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return  out

In [None]:
class ResNet(nn.Module):
    def __init__(
        self,
        img_channels: int,
        num_layers: int,
        block: Type[BasicBlock],
        num_classes: int  = 1000
    ) -> None:
        super(ResNet, self).__init__()
        if num_layers == 18: # ResNet18 만을 본 대회에서 사용함으로 18층만 구현
            layers = [2, 2, 2, 2]
            self.expansion = 1

        self.in_channels = 64
        self.conv1 = nn.Conv2d(
            in_channels=img_channels,
            out_channels=self.in_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512*self.expansion, num_classes)

    def _make_layer(
        self,
        block: Type[BasicBlock],
        out_channels: int,
        blocks: int,
        stride: int = 1
    ) -> nn.Sequential:
        downsample = None
        if stride != 1:
            downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels,
                    out_channels*self.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(out_channels * self.expansion),
            )
        layers = []
        layers.append(
            block(
                self.in_channels, out_channels, stride, self.expansion, downsample
            )
        )
        self.in_channels = out_channels * self.expansion

        for i in range(1, blocks):
            layers.append(block(
                self.in_channels,
                out_channels,
                expansion=self.expansion
            ))
        return nn.Sequential(*layers)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        # print('Dimensions of the last convolutional feature map: ', x.shape)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

### 데이터 전처리

In [None]:
import numpy as np
import random

def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything()

# 이미지 파일이 있는 디렉토리 경로
train_image_directory = '/content/drive/MyDrive/kict/dataset/training_image'   # <--- training_image 폴더 경로를 입력
valid_image_directory = '/content/drive/MyDrive/kict/dataset/validation_image' # <--- validation_image 폴더 경로를 입력

# CustomDataset 클래스 정의
class CustomDataset(Dataset):
    def __init__(self, image_directory, transform=None):
        self.image_directory = image_directory
        self.image_files = [f for f in os.listdir(image_directory) if f.endswith('.jpg')]
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        file_name = self.image_files[idx]
        image_path = os.path.join(self.image_directory, file_name)
        image = Image.open(image_path).convert('RGB')

        # 이미지 파일명에서 스타일과 성별 정보 추출
        parts = file_name.split('_')
        style_gender = parts[-2] + '_' + parts[-1].split('.')[0]  # 스타일과 성별 정보 추출

        # 스타일과 성별 정보를 레이블로 변환
        label = style_gender
        label_idx = label_to_index[label]

        if self.transform:
            image = self.transform(image)
        return image, label_idx

# 레이블을 숫자로 매핑하기 위한 딕셔너리 생성
label_set = set()
for file_name in os.listdir(train_image_directory) + os.listdir(valid_image_directory):
    parts = file_name.split('_')
    style_gender = parts[-2] + '_' + parts[-1].split('.')[0]
    label_set.add(style_gender)

label_list = sorted(list(label_set))
label_to_index = {label: idx for idx, label in enumerate(label_list)}
index_to_label = {idx: label for label, idx in label_to_index.items()}

In [None]:
index_to_label

{0: 'athleisure_W',
 1: 'bodyconscious_W',
 2: 'bold_M',
 3: 'cityglam_W',
 4: 'classic_W',
 5: 'disco_W',
 6: 'ecology_W',
 7: 'feminine_W',
 8: 'genderless_W',
 9: 'grunge_W',
 10: 'hiphop_M',
 11: 'hiphop_W',
 12: 'hippie_M',
 13: 'hippie_W',
 14: 'ivy_M',
 15: 'kitsch_W',
 16: 'lingerie_W',
 17: 'lounge_W',
 18: 'metrosexual_M',
 19: 'military_W',
 20: 'minimal_W',
 21: 'mods_M',
 22: 'normcore_M',
 23: 'normcore_W',
 24: 'oriental_W',
 25: 'popart_W',
 26: 'powersuit_W',
 27: 'punk_W',
 28: 'space_W',
 29: 'sportivecasual_M',
 30: 'sportivecasual_W'}

클래스 총 31개의 범주

In [None]:
len(index_to_label)

31

이미지 데이터 정규화를 위한 평균과 표준편차 계산 수행

In [None]:
# 데이터셋의 평균과 표준편차 계산 함수
def calculate_mean_std(loader):
    mean = 0.0
    std = 0.0
    total_images_count = 0
    for images, _ in loader:
        batch_samples = images.size(0)  # 배치 크기 (이때 마지막 배치는 더 작을 수 있음)
        images = images.view(batch_samples, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        total_images_count += batch_samples

    mean /= total_images_count
    std /= total_images_count
    return mean, std

# 임시로 ToTensor 변환만 적용하여 데이터 로더 생성
temp_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

temp_train_dataset = CustomDataset(train_image_directory, transform=temp_transform)
temp_train_loader = DataLoader(temp_train_dataset, batch_size=32, shuffle=True)

# 평균과 표준편차 계산
mean, std = calculate_mean_std(temp_train_loader)
print(f"Calculated mean: {mean}")
print(f"Calculated std: {std}")

Calculated mean: tensor([0.5498, 0.5226, 0.5052])
Calculated std: tensor([0.2600, 0.2582, 0.2620])


데이터 전처리 및 로드

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2,
                           contrast=0.2,
                           saturation=0.2,
                           hue=0.1),
    transforms.ToTensor(), # 이미지의 픽셀 값이 [0, 255] 범위에서 [0, 1] 범위로 정규화
    transforms.Normalize(  # 텐서의 각 채널을 주어진 평균과 표준편차를 사용하여 정규화
        mean=mean.tolist(),
        std=std.tolist()
        )
])

train_dataset = CustomDataset(train_image_directory, transform=transform)
val_dataset = CustomDataset(valid_image_directory, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

.