# 2024 데이터 크리에이터 캠프

문제: 인공지능은 사람의 마음을 이해할수 있을까?

## Mission3. 패션스타일 선호 여부 예측

## 라이브러리 불러오기

In [1]:
import os
import torch
import json
import numpy as np
import pandas as pd
from torch import Tensor
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from PIL import Image
from sklearn.metrics import accuracy_score
from typing import Type
from collections import defaultdict
import torchvision.transforms as transforms
from sklearn.metrics.pairwise import cosine_similarity

## Resnet

In [2]:
class BasicBlock(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        stride: int = 1,
        expansion: int = 1,
        downsample: nn.Module = None
    ) -> None:
        super(BasicBlock, self).__init__()
        self.expansion = expansion
        self.downsample = downsample
        self.conv1 = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(
            out_channels,
            out_channels*self.expansion,
            kernel_size=3,
            padding=1,
            bias=False
        )
        self.bn2 = nn.BatchNorm2d(out_channels*self.expansion)

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)
        return  out

In [3]:
class ResNet(nn.Module):
    def __init__(
        self,
        img_channels: int,
        num_layers: int,
        block: Type[BasicBlock],
        num_classes: int  = 1000
    ) -> None:
        super(ResNet, self).__init__()
        if num_layers == 18: # ResNet18 만을 본 대회에서 사용함으로 18층만 구현
            layers = [2, 2, 2, 2]
            self.expansion = 1

        self.in_channels = 64
        self.conv1 = nn.Conv2d(
            in_channels=img_channels,
            out_channels=self.in_channels,
            kernel_size=7,
            stride=2,
            padding=3,
            bias=False
        )
        self.bn1 = nn.BatchNorm2d(self.in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512*self.expansion, num_classes)

    def _make_layer(
        self,
        block: Type[BasicBlock],
        out_channels: int,
        blocks: int,
        stride: int = 1
    ) -> nn.Sequential:
        downsample = None
        if stride != 1:
            downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels,
                    out_channels*self.expansion,
                    kernel_size=1,
                    stride=stride,
                    bias=False
                ),
                nn.BatchNorm2d(out_channels * self.expansion),
            )
        layers = []
        layers.append(
            block(
                self.in_channels, out_channels, stride, self.expansion, downsample
            )
        )
        self.in_channels = out_channels * self.expansion

        for i in range(1, blocks):
            layers.append(block(
                self.in_channels,
                out_channels,
                expansion=self.expansion
            ))
        return nn.Sequential(*layers)

    def forward(self, x: Tensor) -> Tensor:
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        print('Dimensions of the last convolutional feature map: ', x.shape)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

### ResNet-18 모델 정의

주어진 ResNet-18 모델을 사용하여 각 이미지의 feature vector를 추출  
-> 프리트레인 가중치 사용함

In [4]:
# ResNet-18 모델 정의
class ResNet18FeatureExtractor(nn.Module):
    def __init__(self):
        super(ResNet18FeatureExtractor, self).__init__()
        self.resnet18 = models.resnet18(pretrained=True) # 사전 학습된 가중치
        self.features = nn.Sequential(*list(self.resnet18.children())[:-1])  # 마지막 FC 레이어 제외

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)  # Flatten
        return x

In [5]:
# CSV 파일 불러오기
mission2_result = pd.read_csv('../dataset/mission2-2_result_all.csv')

# 데이터프레임의 일부 출력
mission2_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4116 entries, 0 to 4115
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   응답자 ID     4116 non-null   int64 
 1   train 선호   3081 non-null   object
 2   train 비선호  3416 non-null   object
 3   valid 선호   1396 non-null   object
 4   valid 비선호  1595 non-null   object
dtypes: int64(1), object(4)
memory usage: 160.9+ KB


In [6]:
mission2_result.head()

Unnamed: 0,응답자 ID,train 선호,train 비선호,valid 선호,valid 비선호
0,52002,W_24111_70_hippie_M.jpg,"T_00004_90_hiphop_M.jpg, T_03007_10_sportiveca...",,
1,66699,"T_00004_90_hiphop_M.jpg, T_01568_50_ivy_M.jpg,...","T_03643_00_metrosexual_M.jpg, T_06009_10_sport...",,
2,66797,"T_01259_10_sportivecasual_M.jpg, T_16092_10_sp...","T_00004_90_hiphop_M.jpg, W_15467_70_hippie_M.j...","T_08486_10_sportivecasual_M.jpg, W_23958_60_mo...",
3,66684,"T_00047_19_normcore_M.jpg, T_03699_90_hiphop_M...","T_00007_19_normcore_M.jpg, W_51917_00_metrosex...",W_15341_60_mods_M.jpg,
4,66817,"T_00012_19_normcore_M.jpg, T_04506_90_hiphop_M...","T_03624_90_hiphop_M.jpg, T_04522_90_hiphop_M.j...",,W_17135_00_metrosexual_M.jpg


### 이미지 전처리

ResNet-18 모델을 사용하여 이미지의 feature vector를 추출 및 저장

In [7]:
# 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# 이미지 feature vector 추출 함수
def extract_features(image_path, model, transform):
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        features = model(image)
    return features.numpy().flatten()

# 이미지 디렉토리 경로
train_image_directory = '../dataset/training_image'
valid_image_directory = '../dataset/validation_image'

# ResNet-18 모델 초기화
model = ResNet18FeatureExtractor()
model.eval()

# 이미지 feature vector 추출 및 저장
train_features = {}
valid_features = {}

for image_name in os.listdir(train_image_directory):
    if image_name.endswith('.jpg'):
        image_path = os.path.join(train_image_directory, image_name)
        image_id = image_name.split('_')[1]
        train_features[image_id] = extract_features(image_path, model, transform)

for image_name in os.listdir(valid_image_directory):
    if image_name.endswith('.jpg'):
        image_path = os.path.join(valid_image_directory, image_name)
        image_id = image_name.split('_')[1]
        valid_features[image_id] = extract_features(image_path, model, transform)


  f"The parameter '{pretrained_param}' is deprecated since 0.13 and may be removed in the future, "


이미지 간 유사도를 계산 및 Validation 데이터 내 응답자의 스타일 선호 여부를 예측

In [8]:

# 유사도 계산 함수
def calculate_similarity(feature1, feature2):
    return cosine_similarity([feature1], [feature2])[0][0]

# Validation 데이터 내 응답자의 스타일 선호 여부 예측
def predict_preference(mission2_result, valid_features, train_features, threshold=0.5):
    predictions = {}
    for index, row in mission2_result.iterrows():
        respondent_id = row['응답자 ID']
        if pd.isna(row['valid 선호']) and pd.isna(row['valid 비선호']):
            continue
        valid_images = []
        if not pd.isna(row['valid 선호']):
            valid_images.extend(row['valid 선호'].split(', '))
        if not pd.isna(row['valid 비선호']):
            valid_images.extend(row['valid 비선호'].split(', '))
        
        respondent_predictions = {}
        for valid_image in valid_images:
            valid_image_id = valid_image.split('_')[1]
            if valid_image_id in valid_features:
                valid_feature = valid_features[valid_image_id]
                similarities = []
                for train_image_id, train_feature in train_features.items():
                    similarity = calculate_similarity(valid_feature, train_feature)
                    similarities.append((train_image_id, similarity))
                similarities.sort(key=lambda x: x[1], reverse=True)
                top_similarities = similarities[:5]  # 상위 5개 유사도 사용
                preference_score = sum([sim for _, sim in top_similarities]) / len(top_similarities)
                respondent_predictions[valid_image] = '선호' if preference_score > threshold else '비선호'
        predictions[respondent_id] = respondent_predictions
    return predictions

# 예측 수행
predictions = predict_preference(mission2_result, valid_features, train_features, threshold=0.5)

성능 확인

In [14]:
# 성능 측정 (예시로 정확도 계산)
def calculate_accuracy(predictions, valid_labels):
    correct = 0
    total = 0
    for respondent_id, respondent_predictions in predictions.items():
        for image_name, predicted_label in respondent_predictions.items():
            image_id = image_name.split('_')[1]  # 이미지 ID 추출
            if image_id in valid_labels:
                total += 1
                if predicted_label == valid_labels[image_id]:
                    correct += 1
    return correct / total if total > 0 else 0

# Validation 데이터의 실제 라벨 로드
valid_labels = {}
valid_label_directory = '../dataset/validation_label'
for filename in os.listdir(valid_label_directory):
    if filename.endswith('.json'):
        filepath = os.path.join(valid_label_directory, filename)
        with open(filepath, 'r', encoding='utf-8') as file:
            data = json.load(file)
            image_id = data['item']['imgName'].split('_')[1]
            Q5 = data['item']['survey']['Q5']
            valid_labels[image_id] = '선호' if Q5 == 2 else '비선호'

# 정확도 계산
accuracy = calculate_accuracy(predictions, valid_labels)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.40


.