In [2]:
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

labels = {
    0: "Angry",
    1: "Disgust",
    2: "Fear",
    3: "Happy",
    4: "Neutral",
    5: "Sad",
    6: "Surprise",
    
}

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)  # RGB 정규화
])

train_dataset = datasets.ImageFolder('/kaggle/input/rafdataset/RAF dataset/train', transform=transform)
test_dataset = datasets.ImageFolder('/kaggle/input/rafdataset/RAF dataset/test', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=32)

In [2]:
from facenet_pytorch import MTCNN
from PIL import Image, ImageEnhance, ImageStat
import torch
from torchvision import transforms

# 얼굴 탐지기 초기화
mtcnn = MTCNN(keep_all=False, device='cpu')  # 단일 얼굴만

def auto_brightness(image, target_mean=250):
    """
    현재 이미지의 밝기 평균을 측정해서,
    target_mean(예: 130)에 맞게 밝기 비율을 조정해주는 함수.
    """
    stat = ImageStat.Stat(image)
    mean = stat.mean[0]  # 흑백 이미지일 때는 채널이 1개

    # 밝기 보정 비율 계산
    brightness_factor = target_mean / (mean + 1e-5)

    # 너무 과한 보정은 방지 (안정화 범위 지정)
    brightness_factor = max(0.7, brightness_factor)

    enhancer = ImageEnhance.Brightness(image)
    return enhancer.enhance(brightness_factor)


image_path = 'Example.jpg' # 확인할 이미지
image = Image.open(image_path).convert("RGB")

image = auto_brightness(image, target_mean=250)


# 얼굴 crop
face = mtcnn(image)  # 결과: torch.Tensor [3, H, W]

if face is not None:
    # ⬇️ 전처리: Grayscale + Resize(48x48)
    transform = transforms.Compose([
        # transforms.Grayscale(num_output_channels=1),
        transforms.Resize((224, 224))
    ])

    # 이미지 값 스케일링
    face = (face * 255).clamp(0, 255).byte()

    # Tensor → PIL 이미지로 변환 후 전처리
    face_pil = transforms.ToPILImage()(face)
    face_gray_resized = transform(face_pil)

    # 저장
    face_gray_resized.save("224x224.jpg")
    print("얼굴 저장 완료: 2424x224.jpg")

else:
    print("얼굴을 찾을 수 없습니다.")

얼굴 저장 완료: 2424x224.jpg


In [3]:
import torch
from torchvision import transforms
from PIL import Image
import torch.nn.functional as F
from facenet_pytorch import MTCNN
from model import EmotionSwin  # Swin 모델 정의

# 감정 라벨 정의
emotion_labels = {
    0: "Angry", 1: "Disgust", 2: "Fear", 3: "Happy",
    4: "Neutral", 5: "Sad", 6: "Surprise",
}

# 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 모델 로딩
model = EmotionSwin(num_classes=7).to(device)
model.load_state_dict(torch.load('emotion_swin_last.pth', map_location=device))
model.eval()

# 이미지 경로 (48x48 grayscale 이미지)
image_path = '224x224.jpg'

# 🔄 전처리: 1채널 → 3채널 복제 → Resize → Tensor → Normalize
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),         # 흑백 → RGB 채널 복제
    transforms.Resize((224, 224)),                       # Swin 입력 크기 맞춤
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5]*3, std=[0.5]*3)      # [-1, 1] 범위 정규화
])

# 이미지 불러오기 & 전처리
image = Image.open(image_path).convert('L')
image = transform(image).unsqueeze(0).to(device)  # [1, 3, 224, 224]

# 예측
with torch.no_grad():
    outputs = model(image)
    probs = F.softmax(outputs, dim=1)
    predicted = torch.argmax(probs, dim=1).item()
    confidence = probs[0][predicted].item()

# 출력
print(f"🧠 감정 예측 결과: {emotion_labels[predicted]} ({confidence * 100:.2f}%)")

🧠 감정 예측 결과: Neutral (99.43%)
