# 음료 검증 모델 학습

## 목표
다회용기에 음료가 담겨있는지 확인하는 경량 이진 분류 모델

## 모델
- **백본**: MobileNetV3-Small (경량화)
- **헤드**: 2-class 분류기
- **출력**: has_beverage (bool), confidence (float)
- **목표 추론 속도**: < 100ms

## 데이터 구조
```
data/beverage_detection/
├── train/
│   ├── with_beverage/    # 음료 있음
│   └── without_beverage/ # 빈 용기
└── val/
    ├── with_beverage/
    └── without_beverage/
```

## 1. 환경 설정

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
from tqdm import tqdm
import time

# GPU 설정
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

## 2. 하이퍼파라미터

In [None]:
# 하이퍼파라미터
BATCH_SIZE = 32
EPOCHS = 15
LEARNING_RATE = 0.001
IMG_SIZE = 224
NUM_CLASSES = 2  # with_beverage, without_beverage

# 경로
TRAIN_DIR = '../data/beverage_detection/train'
VAL_DIR = '../data/beverage_detection/val'
MODEL_SAVE_PATH = '../models/weights/beverage_detector.pth'

# 클래스 이름
CLASS_NAMES = ['without_beverage', 'with_beverage']

## 3. 데이터 전처리

In [None]:
# 학습용 Transform
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.3, contrast=0.3),  # 음료 색상 다양성
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# 검증용 Transform
val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

## 4. 데이터셋 클래스

In [None]:
class BeverageDataset(Dataset):
    """음료 포함 여부 데이터셋"""
    
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.images = []
        self.labels = []
        
        # 데이터 로딩
        for class_idx, class_name in enumerate(CLASS_NAMES):
            class_dir = os.path.join(root_dir, class_name)
            if not os.path.exists(class_dir):
                print(f"Warning: {class_dir} not found")
                continue
                
            for img_name in os.listdir(class_dir):
                if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img_path = os.path.join(class_dir, img_name)
                    self.images.append(img_path)
                    self.labels.append(class_idx)
        
        print(f"Loaded {len(self.images)} images from {root_dir}")
        print(f"  - Without beverage: {self.labels.count(0)}")
        print(f"  - With beverage: {self.labels.count(1)}")
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:
            image = self.transform(image)
        
        return image, label

## 5. 데이터 로더

In [None]:
# 데이터셋 생성
train_dataset = BeverageDataset(TRAIN_DIR, transform=train_transform)
val_dataset = BeverageDataset(VAL_DIR, transform=val_transform)

# 데이터 로더
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

print(f"\nTrain batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

## 6. 샘플 이미지 확인

In [None]:
def show_samples(loader, num_samples=8):
    images, labels = next(iter(loader))
    
    fig, axes = plt.subplots(2, 4, figsize=(15, 8))
    axes = axes.flatten()
    
    for i in range(min(num_samples, len(images))):
        img = images[i].permute(1, 2, 0).numpy()
        img = img * np.array([0.229, 0.224, 0.225]) + np.array([0.485, 0.456, 0.406])
        img = np.clip(img, 0, 1)
        
        axes[i].imshow(img)
        axes[i].set_title(CLASS_NAMES[labels[i]])
        axes[i].axis('off')
    
    plt.tight_layout()
    plt.show()

show_samples(train_loader)

## 7. 경량 모델 정의

In [None]:
class BeverageDetector(nn.Module):
    """MobileNetV3-Small 기반 음료 검증 모델"""
    
    def __init__(self, num_classes=2, pretrained=True):
        super(BeverageDetector, self).__init__()
        
        # MobileNetV3-Small 백본 (경량화)
        self.backbone = models.mobilenet_v3_small(pretrained=pretrained)
        
        # 분류 헤드 교체
        num_features = self.backbone.classifier[0].in_features
        self.backbone.classifier = nn.Sequential(
            nn.Linear(num_features, 256),
            nn.Hardswish(),  # MobileNetV3에 최적화된 활성화 함수
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )
    
    def forward(self, x):
        return self.backbone(x)

# 모델 생성
model = BeverageDetector(num_classes=NUM_CLASSES, pretrained=True)
model = model.to(device)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"\n✓ Lightweight model for fast inference")

## 8. 손실 함수 및 옵티마이저

In [None]:
# 손실 함수
criterion = nn.CrossEntropyLoss()

# 옵티마이저 (Adam)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# 학습률 스케줄러
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True)

## 9. 학습 및 평가 함수

In [None]:
def train_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    pbar = tqdm(loader, desc='Training')
    for images, labels in pbar:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
        pbar.set_postfix({'loss': loss.item(), 'acc': 100 * correct / total})
    
    return running_loss / len(loader), 100 * correct / total

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in tqdm(loader, desc='Validation'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    return running_loss / len(loader), 100 * accuracy_score(all_labels, all_preds), all_preds, all_labels

## 10. 학습 실행

In [None]:
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
best_val_acc = 0.0

for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    print("-" * 50)
    
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc, _, _ = validate(model, val_loader, criterion, device)
    
    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)
    
    print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")
    print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.2f}%")
    
    scheduler.step(val_loss)
    
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f"✓ Best model saved (Val Acc: {val_acc:.2f}%)")

print(f"\n{'='*50}")
print(f"Training completed! Best Val Acc: {best_val_acc:.2f}%")

## 11. 학습 곡선

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

ax1.plot(history['train_loss'], label='Train Loss')
ax1.plot(history['val_loss'], label='Val Loss')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss')
ax1.set_title('Loss Curve')
ax1.legend()
ax1.grid(True)

ax2.plot(history['train_acc'], label='Train Acc')
ax2.plot(history['val_acc'], label='Val Acc')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Accuracy (%)')
ax2.set_title('Accuracy Curve')
ax2.legend()
ax2.grid(True)

plt.tight_layout()
plt.show()

## 12. 최종 평가

In [None]:
# 최고 모델 로드
model.load_state_dict(torch.load(MODEL_SAVE_PATH))
model.eval()

val_loss, val_acc, preds, labels = validate(model, val_loader, criterion, device)

print(f"Final Validation Accuracy: {val_acc:.2f}%")
print(f"Final Validation Loss: {val_loss:.4f}")

# Confusion Matrix
cm = confusion_matrix(labels, preds)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=CLASS_NAMES, yticklabels=CLASS_NAMES)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

print("\nClassification Report:")
print(classification_report(labels, preds, target_names=CLASS_NAMES))

## 13. 추론 속도 벤치마크

In [None]:
# 추론 속도 측정
model.eval()
dummy_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)

# Warm-up
for _ in range(10):
    with torch.no_grad():
        _ = model(dummy_input)

# 벤치마크
num_runs = 100
start_time = time.time()

with torch.no_grad():
    for _ in range(num_runs):
        _ = model(dummy_input)

if device.type == 'cuda':
    torch.cuda.synchronize()

end_time = time.time()
avg_time = (end_time - start_time) / num_runs * 1000  # ms

print(f"Inference Speed:")
print(f"  Average time: {avg_time:.2f} ms")
print(f"  Throughput: {1000/avg_time:.1f} images/sec")
print(f"\n{'✓' if avg_time < 100 else '✗'} Target < 100ms: {'Achieved' if avg_time < 100 else 'Not achieved'}")

## 14. 추론 함수

In [None]:
def detect_beverage(model, image_path, transform, device):
    """
    이미지에서 음료 포함 여부 검증
    
    Args:
        model: 학습된 모델
        image_path: 이미지 경로
        transform: 전처리 transform
        device: 디바이스
    
    Returns:
        dict: {'has_beverage': bool, 'confidence': float}
    """
    model.eval()
    
    image = Image.open(image_path).convert('RGB')
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    with torch.no_grad():
        outputs = model(image_tensor)
        probabilities = torch.softmax(outputs, dim=1)
        confidence, predicted = torch.max(probabilities, 1)
    
    has_beverage = predicted.item() == 1
    confidence_score = confidence.item()
    
    return {
        'has_beverage': has_beverage,
        'confidence': confidence_score,
        'class': CLASS_NAMES[predicted.item()],
        'probabilities': probabilities.cpu().numpy()[0]
    }

def visualize_detection(image_path, result):
    image = Image.open(image_path)
    
    plt.figure(figsize=(10, 5))
    
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.axis('off')
    plt.title(f"Detection: {result['class']}\nConfidence: {result['confidence']*100:.1f}%")
    
    plt.subplot(1, 2, 2)
    plt.bar(CLASS_NAMES, result['probabilities'])
    plt.ylabel('Probability')
    plt.title('Class Probabilities')
    plt.ylim(0, 1)
    
    plt.tight_layout()
    plt.show()

## 15. 테스트 이미지로 추론

In [None]:
# 테스트 이미지 경로
test_image_with = '../data/beverage_detection/val/with_beverage/test1.jpg'
test_image_without = '../data/beverage_detection/val/without_beverage/test2.jpg'

# 음료 있음 테스트
if os.path.exists(test_image_with):
    result = detect_beverage(model, test_image_with, val_transform, device)
    print(f"Test 1: {result['class']} (confidence: {result['confidence']*100:.2f}%)")
    visualize_detection(test_image_with, result)

# 음료 없음 테스트
if os.path.exists(test_image_without):
    result = detect_beverage(model, test_image_without, val_transform, device)
    print(f"Test 2: {result['class']} (confidence: {result['confidence']*100:.2f}%)")
    visualize_detection(test_image_without, result)

## 16. 모델 최적화 (선택사항)

In [None]:
# 양자화 (INT8) - 추론 속도 더 향상
model_quantized = torch.quantization.quantize_dynamic(
    model, {nn.Linear}, dtype=torch.qint8
)

# 양자화 모델 저장
quantized_path = MODEL_SAVE_PATH.replace('.pth', '_quantized.pth')
torch.save(model_quantized.state_dict(), quantized_path)

print(f"Quantized model saved: {quantized_path}")

# 크기 비교
original_size = os.path.getsize(MODEL_SAVE_PATH) / (1024 * 1024)  # MB
quantized_size = os.path.getsize(quantized_path) / (1024 * 1024)  # MB

print(f"\nModel size:")
print(f"  Original: {original_size:.2f} MB")
print(f"  Quantized: {quantized_size:.2f} MB")
print(f"  Reduction: {(1 - quantized_size/original_size)*100:.1f}%")

## 17. ONNX 변환

In [None]:
# ONNX 변환 (배포용)
dummy_input = torch.randn(1, 3, IMG_SIZE, IMG_SIZE).to(device)
onnx_path = MODEL_SAVE_PATH.replace('.pth', '.onnx')

torch.onnx.export(
    model,
    dummy_input,
    onnx_path,
    export_params=True,
    opset_version=11,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)

print(f"Model exported to ONNX: {onnx_path}")

## 18. 실전 시나리오 테스트

In [None]:
# 실전 시나리오: 사용 인증 워크플로우
def verify_usage(image_path, model, transform, device, confidence_threshold=0.7):
    """
    사용 인증: 음료가 담겨있으면 통과
    
    Args:
        image_path: 사용 인증 이미지
        model: 학습된 모델
        transform: 전처리
        device: 디바이스
        confidence_threshold: 신뢰도 임계값
    
    Returns:
        dict: 검증 결과
    """
    result = detect_beverage(model, image_path, transform, device)
    
    # 검증 로직
    is_valid = result['has_beverage'] and result['confidence'] >= confidence_threshold
    
    return {
        'is_valid': is_valid,
        'has_beverage': result['has_beverage'],
        'confidence': result['confidence'],
        'message': 'OK' if is_valid else 'Beverage not detected or low confidence'
    }

# 테스트
test_cases = [
    ('../data/usage_test/coffee_tumbler.jpg', '커피 담긴 텀블러'),
    ('../data/usage_test/empty_tumbler.jpg', '빈 텀블러'),
    ('../data/usage_test/water_bottle.jpg', '물 담긴 보틀'),
]

for img_path, description in test_cases:
    if os.path.exists(img_path):
        result = verify_usage(img_path, model, val_transform, device)
        print(f"{description}:")
        print(f"  Valid: {result['is_valid']}")
        print(f"  Has beverage: {result['has_beverage']}")
        print(f"  Confidence: {result['confidence']*100:.2f}%")
        print(f"  Message: {result['message']}")
        print()

## 요약

### 모델 성능
- **최고 검증 정확도**: {best_val_acc:.2f}%
- **추론 속도**: ~{avg_time:.2f}ms (목표 < 100ms)
- **모델 크기**: ~{original_size:.2f}MB

### 검증 기준
- **음료 있음** → ✅ 통과
- **음료 없음** → ❌ 거부
- **신뢰도 임계값**: 0.7 (조정 가능)

### 다음 단계
1. FastAPI 서버에 모델 통합
2. 실전 데이터로 추가 테스트
3. 오분류 케이스 분석 및 개선
4. 모델 최적화 (양자화, ONNX)