In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount = True)


Mounted at /content/drive


1.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
import os

2.

In [None]:
import os
import numpy as np
from sklearn.model_selection import StratifiedShuffleSplit
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset

# Train 데이터셋 경로 (processed_images와 sorted_training_images)
train_dir_1 = '/content/drive/MyDrive/mission_data/gender_processed_images'
train_dir_2 = '/content/drive/MyDrive/mission_data/gender_training_images'

# 이미지 데이터 경로 설정
input_dir = '/content/drive/MyDrive/mission_data/gender_validation_images'

# 이미지 전처리
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # ResNet은 224x224 입력 크기를 기대함
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # ImageNet 사전 학습된 모델 정규화
])

# 각 경로의 데이터셋 불러오기
train_dataset_1 = datasets.ImageFolder(root=train_dir_1, transform=transform)
train_dataset_2 = datasets.ImageFolder(root=train_dir_2, transform=transform)

# 두 개의 데이터셋을 합침
train_dataset = ConcatDataset([train_dataset_1, train_dataset_2])

# 전체 validation/test 데이터셋
dataset = datasets.ImageFolder(root=input_dir, transform=transform)

# 전체 이미지 및 라벨 추출
image_indices = np.arange(len(dataset))  # 이미지 인덱스
labels = np.array([dataset.targets[i] for i in image_indices])  # 각 이미지의 라벨

# StratifiedShuffleSplit 사용 (train 50%, validation 50%)
splitter = StratifiedShuffleSplit(n_splits=1, test_size=476, random_state=42)

val_indices, test_indices = next(splitter.split(image_indices, labels))

# Subset을 사용하여 train/validation 데이터셋 분리
val_dataset = Subset(dataset, val_indices)
test_dataset = Subset(dataset, test_indices)

# DataLoader 설정
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"Train size: {len(train_loader.dataset)}, Val size: {len(val_loader.dataset)}, Test size: {len(test_loader.dataset)}")

Train size: 8140, Val size: 475, Test size: 476


3.

In [None]:
from collections import Counter
import pandas as pd

def count_class_images_in_concat(dataset, dataset_name):
    class_counts = Counter()

    # ConcatDataset 안의 각 데이터셋을 순회
    for sub_dataset in dataset.datasets:
        if isinstance(sub_dataset, torch.utils.data.Subset):
            # Subset인 경우 원본 데이터셋의 targets 사용
            labels = [sub_dataset.dataset.targets[i] for i in sub_dataset.indices]
        else:
            # Subset이 아닌 경우 직접 targets 사용
            labels = sub_dataset.targets

        # 각 클래스의 이미지 수를 계산하고 합산
        class_counts.update(labels)

    # DataFrame으로 변환
    class_distribution = pd.DataFrame(class_counts.items(), columns=['Class', 'Number of Images'])
    class_distribution['Dataset'] = dataset_name

    return class_distribution

def count_class_images_in_dataset(dataset, dataset_name):
    # Subset인 경우와 아닌 경우를 처리
    if isinstance(dataset, torch.utils.data.Subset):
        labels = [dataset.dataset.targets[i] for i in dataset.indices]
    else:
        labels = dataset.targets

    # 각 클래스의 이미지 수를 계산
    class_counts = Counter(labels)

    # DataFrame으로 변환
    class_distribution = pd.DataFrame(class_counts.items(), columns=['Class', 'Number of Images'])
    class_distribution['Dataset'] = dataset_name

    return class_distribution

# Train 데이터셋이 ConcatDataset인 경우 처리
if isinstance(train_dataset, torch.utils.data.ConcatDataset):
    train_distribution = count_class_images_in_concat(train_dataset, "Train")
else:
    train_distribution = count_class_images_in_dataset(train_dataset, "Train")

# Validation과 Test 데이터셋의 클래스 수 세기
val_distribution = count_class_images_in_dataset(val_dataset, "Validation")
test_distribution = count_class_images_in_dataset(test_dataset, "Test")

# 세 DataFrame 합치기
combined_distribution = pd.concat([train_distribution, val_distribution, test_distribution], ignore_index=True)

# pivot을 사용하여 옆으로 나열
pivot_distribution = combined_distribution.pivot(index='Class', columns='Dataset', values='Number of Images').fillna(0)

# 원하는 컬럼 순서로 재배열
pivot_distribution = pivot_distribution[['Train', 'Validation', 'Test']]

# 결과 출력
print("Combined Class Distribution (Pivoted):")
pivot_distribution


Combined Class Distribution (Pivoted):


Dataset,Train,Validation,Test
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,4496,262,263
1,3644,213,213


4.

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models
from tqdm import tqdm

# Hyperparameters
num_epochs = 30
learning_rate = 0.001

# Device 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ResNet-18 모델 정의
model = models.resnet18(weights=None)  # pretrained=False로 설정
num_features = model.fc.in_features  # 마지막 FC 레이어의 입력 크기
model.fc = nn.Linear(num_features, 2)  # 출력 클래스 수에 맞게 수정
model = model.to(device)

# Loss 및 Optimizer 설정
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Learning Rate Scheduler 설정
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

In [None]:
# 체크포인트 저장 함수
def save_checkpoint(state, filename="/content/drive/MyDrive/GSW/checkpoint.pth", weights_only=False):
    torch.save(state, filename)


# 체크포인트 로드 함수
def load_checkpoint(filename="/content/drive/MyDrive/GSW/checkpoint.pth", weights_only=False):
    checkpoint = torch.load(filename)
    model.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
    epoch = checkpoint['epoch']
    best_accuracy = checkpoint['best_accuracy']
    return epoch, best_accuracy

In [None]:
# 학습 및 검증 함수
def train_and_validate(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, start_epoch=0, best_accuracy=0.0):
    history = {'train_loss': [], 'train_accuracy': [], 'val_loss': [], 'val_accuracy': []}

    for epoch in range(start_epoch, num_epochs):
        model.train()
        running_loss = 0.0
        correct_predictions = 0
        total_samples = 0

        for images, labels in tqdm(train_loader, desc=f"Training Epoch {epoch+1}/{num_epochs}"):
            images, labels = images.to(device), labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass 및 최적화
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

            # Accuracy 계산
            _, predicted = torch.max(outputs, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

        avg_loss = running_loss / len(train_loader)
        accuracy = correct_predictions / total_samples
        history['train_loss'].append(avg_loss)
        history['train_accuracy'].append(accuracy)

        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_loss:.4f}, Train Accuracy: {accuracy:.4f}")

        # Validation phase
        model.eval()
        val_running_loss = 0.0
        val_correct_predictions = 0
        val_total_samples = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_running_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                val_total_samples += labels.size(0)
                val_correct_predictions += (predicted == labels).sum().item()

        avg_val_loss = val_running_loss / len(val_loader)
        val_accuracy = val_correct_predictions / val_total_samples
        history['val_loss'].append(avg_val_loss)
        history['val_accuracy'].append(val_accuracy)

        print(f"Validation Loss: {avg_val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")

        scheduler.step()

        # 가장 높은 정확도를 가진 모델 저장
        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            torch.save(model.state_dict(), 'best_model.pth')

        # 체크포인트 저장
        save_checkpoint({
            'epoch': epoch + 1,  # 현재 에폭 저장
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict(),
            'best_accuracy': best_accuracy

        })

    return history


In [None]:
# 만약 기존 체크포인트가 있다면 로드
try:
    start_epoch, best_accuracy = load_checkpoint()
    print(f"Checkpoint loaded. Resuming from epoch {start_epoch} with best accuracy {best_accuracy:.4f}.")
except FileNotFoundError:
    print("No checkpoint found, starting from scratch.")
    start_epoch, best_accuracy = 0, 0.0

# 학습 재개
history = train_and_validate(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, start_epoch, best_accuracy)


  checkpoint = torch.load(filename)


Checkpoint loaded. Resuming from epoch 6 with best accuracy 0.6695.


Training Epoch 7/30: 100%|██████████| 128/128 [2:25:49<00:00, 68.36s/it]


Epoch [7/30], Train Loss: 0.5927, Train Accuracy: 0.6826
Validation Loss: 0.6272, Validation Accuracy: 0.6316


Training Epoch 8/30: 100%|██████████| 128/128 [52:16<00:00, 24.50s/it]


Epoch [8/30], Train Loss: 0.5617, Train Accuracy: 0.7136
Validation Loss: 0.6523, Validation Accuracy: 0.6379


Training Epoch 9/30: 100%|██████████| 128/128 [52:11<00:00, 24.46s/it]


Epoch [9/30], Train Loss: 0.5211, Train Accuracy: 0.7479
Validation Loss: 0.6514, Validation Accuracy: 0.6611


Training Epoch 10/30: 100%|██████████| 128/128 [52:29<00:00, 24.61s/it]


Epoch [10/30], Train Loss: 0.4952, Train Accuracy: 0.7619
Validation Loss: 0.4877, Validation Accuracy: 0.7705


Training Epoch 11/30: 100%|██████████| 128/128 [53:00<00:00, 24.85s/it]


Epoch [11/30], Train Loss: 0.3834, Train Accuracy: 0.8240
Validation Loss: 0.4609, Validation Accuracy: 0.7895


Training Epoch 12/30: 100%|██████████| 128/128 [53:05<00:00, 24.88s/it]


Epoch [12/30], Train Loss: 0.3123, Train Accuracy: 0.8655
Validation Loss: 0.4639, Validation Accuracy: 0.8400


Training Epoch 13/30: 100%|██████████| 128/128 [52:38<00:00, 24.68s/it]


Epoch [13/30], Train Loss: 0.2379, Train Accuracy: 0.9010
Validation Loss: 0.6092, Validation Accuracy: 0.7726


Training Epoch 14/30: 100%|██████████| 128/128 [52:32<00:00, 24.63s/it]


Epoch [14/30], Train Loss: 0.1562, Train Accuracy: 0.9398
Validation Loss: 0.5489, Validation Accuracy: 0.8274


Training Epoch 15/30: 100%|██████████| 128/128 [52:47<00:00, 24.75s/it]


Epoch [15/30], Train Loss: 0.1206, Train Accuracy: 0.9529
Validation Loss: 0.5356, Validation Accuracy: 0.8547


Training Epoch 16/30: 100%|██████████| 128/128 [52:36<00:00, 24.66s/it]


Epoch [16/30], Train Loss: 0.0836, Train Accuracy: 0.9699
Validation Loss: 0.3838, Validation Accuracy: 0.8674


Training Epoch 17/30: 100%|██████████| 128/128 [52:32<00:00, 24.63s/it]


Epoch [17/30], Train Loss: 0.0431, Train Accuracy: 0.9845
Validation Loss: 0.4683, Validation Accuracy: 0.8758


Training Epoch 18/30: 100%|██████████| 128/128 [52:46<00:00, 24.74s/it]


Epoch [18/30], Train Loss: 0.0505, Train Accuracy: 0.9834
Validation Loss: 0.5610, Validation Accuracy: 0.8632


Training Epoch 19/30: 100%|██████████| 128/128 [52:41<00:00, 24.70s/it]


Epoch [19/30], Train Loss: 0.0366, Train Accuracy: 0.9876
Validation Loss: 0.4967, Validation Accuracy: 0.8926


Training Epoch 20/30: 100%|██████████| 128/128 [52:30<00:00, 24.61s/it]


Epoch [20/30], Train Loss: 0.0345, Train Accuracy: 0.9873
Validation Loss: 0.5469, Validation Accuracy: 0.8884


Training Epoch 21/30: 100%|██████████| 128/128 [52:25<00:00, 24.57s/it]


Epoch [21/30], Train Loss: 0.0110, Train Accuracy: 0.9972
Validation Loss: 0.4562, Validation Accuracy: 0.8926


Training Epoch 22/30: 100%|██████████| 128/128 [52:42<00:00, 24.70s/it]


Epoch [22/30], Train Loss: 0.0062, Train Accuracy: 0.9980
Validation Loss: 0.4418, Validation Accuracy: 0.8989


Training Epoch 23/30: 100%|██████████| 128/128 [52:56<00:00, 24.82s/it]


Epoch [23/30], Train Loss: 0.0023, Train Accuracy: 0.9998
Validation Loss: 0.4733, Validation Accuracy: 0.8926


Training Epoch 24/30: 100%|██████████| 128/128 [52:31<00:00, 24.62s/it]


Epoch [24/30], Train Loss: 0.0010, Train Accuracy: 1.0000
Validation Loss: 0.4427, Validation Accuracy: 0.8905


Training Epoch 25/30: 100%|██████████| 128/128 [55:22<00:00, 25.96s/it]


Epoch [25/30], Train Loss: 0.0008, Train Accuracy: 0.9999
Validation Loss: 0.4577, Validation Accuracy: 0.8863


Training Epoch 26/30: 100%|██████████| 128/128 [55:01<00:00, 25.79s/it]


Epoch [26/30], Train Loss: 0.0008, Train Accuracy: 1.0000
Validation Loss: 0.4933, Validation Accuracy: 0.8884


Training Epoch 27/30: 100%|██████████| 128/128 [52:58<00:00, 24.83s/it]


Epoch [27/30], Train Loss: 0.0015, Train Accuracy: 0.9999
Validation Loss: 0.5433, Validation Accuracy: 0.8989


Training Epoch 28/30: 100%|██████████| 128/128 [52:50<00:00, 24.77s/it]


Epoch [28/30], Train Loss: 0.0291, Train Accuracy: 0.9942
Validation Loss: 0.5506, Validation Accuracy: 0.8989


Training Epoch 29/30: 100%|██████████| 128/128 [52:47<00:00, 24.74s/it]


Epoch [29/30], Train Loss: 0.0335, Train Accuracy: 0.9870
Validation Loss: 0.6011, Validation Accuracy: 0.8968


Training Epoch 30/30: 100%|██████████| 128/128 [52:39<00:00, 24.68s/it]


Epoch [30/30], Train Loss: 0.0044, Train Accuracy: 0.9994
Validation Loss: 0.5507, Validation Accuracy: 0.8968


5.

In [None]:
# best 모델 로드
model.load_state_dict(torch.load('best_model.pth'))
model.eval()  # 평가 모드로 설정