In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from collections import defaultdict
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader
from PIL import ImageFile
from tqdm import tqdm
ImageFile.LOAD_TRUNCATED_IMAGES = True

In [None]:
# 경로 설정
base_path = '/content/drive/MyDrive/fashion_data/sample'
image_root = os.path.join(base_path, 'raw_data')
splits = ['1.training', '2.validation']
genders = ['man', 'woman']
valid_years = [str(y) for y in range(1950, 2020, 10)]

In [None]:
is_cuda = torch.cuda.is_available()
device = torch.device('cuda' if is_cuda else 'cpu')
print('Current device is', device)

Current device is cuda


In [None]:
# Mission 1
#  1 - 1
def count_images_by_gender_style():
    counter = defaultdict(int)

    for split in splits:
        for gender in genders:
            gender_path = os.path.join(image_root, split, gender)
            if not os.path.exists(gender_path):
                continue

            for year in os.listdir(gender_path):
                if year not in valid_years:
                    continue
                year_path = os.path.join(gender_path, year)
                if not os.path.isdir(year_path):
                    continue

                for fname in os.listdir(year_path):
                    if not fname.endswith('.jpg'):
                        continue
                    try:
                        parts = fname.split('_')
                        style = parts[3]
                        gender_label = parts[4].split('.')[0]
                        counter[(gender_label, style)] += 1
                    except:
                        continue

    # 결과 정리
    result = pd.DataFrame([
        {"성별": g, "스타일": s, "이미지 수": count}
        for (g, s), count in sorted(counter.items(), key=lambda x: (-x[1], x[0]))
    ])
    return result

# 실행 결과
stat_df = count_images_by_gender_style()
stat_df.head(20)

print("1-1: 성별 & 스타일 통계")
stat_df = count_images_by_gender_style()
display(stat_df.head(20))

1-1: 성별 & 스타일 통계


Unnamed: 0,성별,스타일,이미지 수
0,M,hippie,150
1,M,bold,138
2,M,metrosexual,138
3,M,mods,134
4,M,hiphop,133
5,M,ivy,132
6,M,sportivecasual,129
7,W,minimal,122
8,W,feminine,113
9,W,bodyconscious,110


In [None]:
# 1 - 2 (resnet18)
def train_resnet18():
    train_path = os.path.join(image_root, '1.training')
    val_path = os.path.join(image_root, '2.validation')

    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
        transforms.Normalize([0.5]*3, [0.5]*3)
    ])

    train_dataset = datasets.ImageFolder(train_path, transform=transform)
    val_dataset = datasets.ImageFolder(val_path, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=128)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model = models.resnet18(pretrained=False)
    model.fc = nn.Linear(model.fc.in_features, len(train_dataset.classes))
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    def train(model, loader):
        model.train()
        total_loss = 0
        for images, labels in tqdm(loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(loader)

    def evaluate(model, loader):
        model.eval()
        correct = total = 0
        with torch.no_grad():
            for images, labels in loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                _, preds = torch.max(outputs, 1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        return correct / total if total > 0 else 0.0
#Top-1 Accuracy 출력
    for epoch in range(5):
        loss = train(model, train_loader)
        acc = evaluate(model, val_loader)
        print(f"[epoch {epoch+1}] loss: {loss:.4f}, Accuracy: {acc * 100:.2f}%", flush=True)
train_resnet18()

100%|██████████| 10/10 [06:20<00:00, 38.09s/it]


[epoch 1] loss: 1.0809, Accuracy: 56.72%


100%|██████████| 10/10 [06:23<00:00, 38.31s/it]


[epoch 2] loss: 0.6656, Accuracy: 56.72%


100%|██████████| 10/10 [06:18<00:00, 37.90s/it]


[epoch 3] loss: 0.6051, Accuracy: 62.38%


100%|██████████| 10/10 [06:19<00:00, 37.99s/it]


[epoch 4] loss: 0.5531, Accuracy: 62.13%


100%|██████████| 10/10 [06:20<00:00, 38.05s/it]


[epoch 5] loss: 0.5375, Accuracy: 59.43%
