In [31]:
import json
import os
from pathlib import Path

import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from PIL import Image
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from tqdm import tqdm

# --- 1. 설정 (Configuration) ---
CONFIG = {
    "image_dir": "sleepy/dataset/Trainning/image_trainning/all_image_trainning",
    "label_dir": "sleepy/dataset/Trainning/label_trainning/all_label_trainning",
    "test_image_dir": "sleepy/dataset/Test/image_test/all_image_test",
    "test_label_dir": "sleepy/dataset/Test/label_test/all_label_test",
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "img_size": 90,
    "batch_size": 32,
    "epochs": 5,
    "learning_rate": 0.001,
    "max_samples": 9000,
    "model_path": "eye_state_classifier.pth"
}

# --- 2. 유틸리티 함수 ---
def str2bool(val: str) -> bool:
    return str(val).lower() in ("true", "1")

def crop_and_resize_eye(frame: np.ndarray, bbox: tuple, size: int = 90) -> np.ndarray | None:
    if bbox is None:
        return None
    x, y, w, h = bbox
    x1 = max(x - int(w * 0.5), 0)
    y1 = max(y - int(h * 0.5), 0)
    x2 = min(x + w + int(w * 0.5), frame.shape[1])
    y2 = min(y + h + int(h * 0.5), frame.shape[0])
    eye_crop = frame[y1:y2, x1:x2]
    if eye_crop.size == 0:
        return None
    return cv2.resize(eye_crop, (size, size))

# --- 3. Dataset 클래스 ---
class EyeDataset(Dataset):
    def __init__(self, image_dir, label_dir, transform=None, max_samples=None):
        self.image_dir = Path(image_dir)
        self.label_dir = Path(label_dir)
        self.transform = transform
        self.samples = []

        image_files = sorted([f for f in os.listdir(image_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
        if max_samples:
            image_files = image_files[:max_samples]

        print("🔍 유효한 눈 데이터를 수집 중...")
        for img_name in tqdm(image_files):
            img_path = self.image_dir / img_name
            label_path = self.label_dir / (img_name.rsplit('.', 1)[0] + '.json')
            if not label_path.exists():
                continue

            try:
                with open(label_path, 'r', encoding='utf-8') as f:
                    label_data = json.load(f)
            except Exception:
                continue

            object_info = label_data.get('ObjectInfo', {})
            keypoints_info = object_info.get('KeyPoints', {})
            points = keypoints_info.get('Points')
            bounding_boxes = object_info.get('BoundingBox', {})

            if not points or len(points) < (47 * 2):
                continue
            try:
                points_arr = np.array(points, dtype=np.float32).reshape(-1, 2)
            except (ValueError, TypeError):
                continue

            leye_indices = [36, 37, 38, 39, 40, 41]
            reye_indices = [42, 43, 44, 45, 46, 47]

            leye_points = points_arr[leye_indices]
            reye_points = points_arr[reye_indices]

            lx_min, ly_min = np.min(leye_points, axis=0)
            lx_max, ly_max = np.max(leye_points, axis=0)
            rx_min, ry_min = np.min(reye_points, axis=0)
            rx_max, ry_max = np.max(reye_points, axis=0)

            leye_bbox = (int(lx_min), int(ly_min), int(lx_max - lx_min), int(ly_max - ly_min))
            reye_bbox = (int(rx_min), int(ry_min), int(rx_max - rx_min), int(ry_max - ry_min))

            leye_opened = str2bool(bounding_boxes.get('Leye', {}).get('Opened', False))
            reye_opened = str2bool(bounding_boxes.get('Reye', {}).get('Opened', False))

            if leye_bbox[2] > 0 and leye_bbox[3] > 0:
                self.samples.append((str(img_path), leye_bbox, int(leye_opened)))
            if reye_bbox[2] > 0 and reye_bbox[3] > 0:
                self.samples.append((str(img_path), reye_bbox, int(reye_opened)))

        print(f"✅ 총 {len(self.samples)}개의 눈 이미지 수집 완료")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, bbox, label = self.samples[idx]
        image_cv = cv2.imread(img_path)
        if image_cv is None:
            return None, None
        eye_img = crop_and_resize_eye(image_cv, bbox, size=CONFIG['img_size'])
        if eye_img is None:
            return None, None
        image_pil = Image.fromarray(cv2.cvtColor(eye_img, cv2.COLOR_BGR2RGB))
        if self.transform:
            image = self.transform(image_pil)
        return image, torch.tensor(label, dtype=torch.float)


In [35]:
train_dataset = EyeDataset(CONFIG['image_dir'], CONFIG['label_dir'], transform=transform, max_samples=CONFIG['max_samples'])
len(train_dataset)

🔍 유효한 눈 데이터를 수집 중...


  0%|          | 0/9000 [00:00<?, ?it/s]

100%|██████████| 9000/9000 [00:03<00:00, 2420.63it/s]

✅ 총 17990개의 눈 이미지 수집 완료





17990

In [42]:
train_dataset[0][1]

tensor(0.)

In [28]:
# --- 4. 모델 정의 ---
class EyeCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),  # 90x90 -> 90x90
            nn.ReLU(),
            nn.MaxPool2d(2),                 # 90x90 -> 45x45

            nn.Conv2d(16, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),                 # 45x45 -> 22x22

            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),                 # 22x22 -> 11x11

            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),   
        )

        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 5 * 5, 128),
            nn.ReLU(),
            nn.Linear(128, 1),  # 2 classes: open/closed
            nn.Sigmoid()  # Output between 0 and 1
        )

    def forward(self, x):
        x = self.conv(x)
        x = self.fc(x)
        return x

In [29]:
# --- 5. 학습 및 평가 ---
def train_model(model, train_loader, criterion, optimizer, device, epochs):
    model.train()
    losses = []
    print("\n=== 🚀 학습 시작 ===")
    for epoch in range(5):
        total_loss = 0
        for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/1"):
            if images is None: continue
            images = images.to(device)
            labels = labels.float().unsqueeze(1).to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        avg_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}")
        losses.append(avg_loss)
    return losses

def evaluate_model(model, test_loader, device):
    model.eval()
    preds, trues = [], []
    print("\n=== 📊 테스트 평가 중 ===")
    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            if images is None: continue
            images = images.to(device)
            outputs = model(images)
            predicted = (outputs > 0.5).int().cpu().squeeze()
            preds.append(predicted)
            trues.append(labels)

    y_pred = torch.cat([p.view(-1) for p in preds]).numpy()
    y_true = torch.cat([l.view(-1) for l in trues]).numpy()

    print("\n[평가 결과]")
    print(classification_report(y_true, y_pred, target_names=["Closed (0)", "Opened (1)"], zero_division=0))
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print(f"\nAccuracy: {accuracy_score(y_true, y_pred):.4f}")

In [38]:
# --- 6. 메인 블록 ---

print(f"💻 디바이스: {CONFIG['device']}")
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,)*3, (0.5,)*3)
])

# 학습/테스트 데이터셋 준비
train_dataset = EyeDataset(CONFIG['image_dir'], CONFIG['label_dir'], transform=transform, max_samples=CONFIG['max_samples'])
test_dataset = EyeDataset(CONFIG['test_image_dir'], CONFIG['test_label_dir'], transform=transform)

def collate_fn(batch):
    batch = list(filter(lambda x: x[0] is not None, batch))
    if not batch:
        return torch.Tensor(), torch.Tensor()
    return torch.utils.data.dataloader.default_collate(batch)

train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'], shuffle=False, collate_fn=collate_fn)

print(f"\n학습 샘플 수: {len(train_dataset)}")
print(f"테스트 샘플 수: {len(test_dataset)}")


💻 디바이스: cuda
🔍 유효한 눈 데이터를 수집 중...


  0%|          | 0/9000 [00:00<?, ?it/s]

100%|██████████| 9000/9000 [00:03<00:00, 2672.76it/s]


✅ 총 17990개의 눈 이미지 수집 완료
🔍 유효한 눈 데이터를 수집 중...


100%|██████████| 500/500 [00:00<00:00, 2824.06it/s]

✅ 총 1000개의 눈 이미지 수집 완료

학습 샘플 수: 17990
테스트 샘플 수: 1000





In [39]:

# 모델 정의 및 학습
model = EyeCNN().to(CONFIG['device'])
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])


In [40]:
train_model(model, train_loader, criterion, optimizer, device='cuda:0', epochs=1)



=== 🚀 학습 시작 ===


Epoch 1/1: 100%|██████████| 563/563 [02:04<00:00,  4.54it/s]


Epoch 1: Loss=0.0046


Epoch 2/1:  21%|██▏       | 120/563 [00:25<01:34,  4.71it/s]


KeyboardInterrupt: 

In [41]:
# 평가 함수에서 결과 반환하도록 수정
def evaluate_model(model, test_loader, device):
    model.eval()
    preds, trues = [], []
    with torch.no_grad():
        for images, labels in tqdm(test_loader):
            if images is None: continue
            images = images.to(device)
            outputs = model(images)
            predicted = (outputs > 0.5).int().cpu().squeeze()
            preds.append(predicted)
            trues.append(labels)
    y_pred = torch.cat([p.view(-1) for p in preds]).numpy()
    y_true = torch.cat([l.view(-1) for l in trues]).numpy()
    return y_true, y_pred

# 메인에서 평가 결과 출력
y_true, y_pred = evaluate_model(model, test_loader, CONFIG['device'])
print(classification_report(y_true, y_pred, target_names=['Closed (0)', 'Opened (1)'], labels=[0, 1], zero_division=0))


100%|██████████| 32/32 [00:05<00:00,  5.82it/s]

              precision    recall  f1-score   support

  Closed (0)       1.00      1.00      1.00      1000
  Opened (1)       0.00      0.00      0.00         0

    accuracy                           1.00      1000
   macro avg       0.50      0.50      0.50      1000
weighted avg       1.00      1.00      1.00      1000






In [43]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_true, y_pred)



array([[1000]])

In [45]:
y_true.sum()

np.float32(0.0)

In [None]:

plt.plot(losses, label='Train Loss')
plt.title("Training Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.legend()
plt.show()