In [None]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torchvision.models import resnet50
from gradcam.utils import visualize_cam
from gradcam import GradCAM
from PIL import Image
import warnings
warnings.filterwarnings(action='ignore')

# Hyperparameter Configuration
CFG = {
    'IMG_SIZE': 224,
    'EPOCHS': 100,
    'LEARNING_RATE': 1e-6,
    'BATCH_SIZE': 64,
    'SEED': 41
}

# 설정 및 모델 로드
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = resnet50()
model.fc = nn.Linear(model.fc.in_features, 29)
model.load_state_dict(torch.load('data/save_data/saved/best_model2.pth'))
model.to(device)
model.eval()

# GradCAM 설정
gradcam = GradCAM(model, model.layer4)


# 이미지 전처리를 위한 변환
transform = transforms.Compose([
    transforms.Resize((CFG['IMG_SIZE'], CFG['IMG_SIZE'])),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(CFG['IMG_SIZE'], scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
dataset = ImageFolder(root='./INSECT_CLASSIFICATION/FINAL/', transform=train_transforms)

class_counts = [0] * 29
for _, label in dataset:
    class_counts[label] += 1
class_names = dataset.classes

In [None]:
# 웹캠 초기화
cap = cv2.VideoCapture(0)
while True:
    ret, frame = cap.read()
    if not ret:
        break

    # Convert numpy image (from OpenCV) to PIL Image
    pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    
    # 이미지 전처리
    img_tensor = transform(pil_image).unsqueeze(0).to(device)
    
    # 예측 및 GradCAM 생성
    preds = model(img_tensor)
    _, predicted = preds.max(1)
    predicted_class = class_names[predicted.item()]
    
    # 원본 이미지에 예측된 클래스 이름 표시
    # cv2.putText(frame, predicted_class, (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2, cv2.LINE_AA)

    mask = gradcam(img_tensor, class_idx=predicted.item())
    
    # 원본 이미지와 Grad-CAM 시각화 병합
    heatmap = (mask[0].cpu() - mask[0].cpu().min()) / (mask[0].cpu().max() - mask[0].cpu().min()) # Normalize the mask
    heatmap = np.uint8(255 * heatmap)
    heatmap = heatmap.squeeze()  # Ensure heatmap is 2D
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    heatmap_resized = cv2.resize(heatmap, (frame.shape[1], frame.shape[0]))  # Resize heatmap to match frame
    heatmap_resized = np.float32(heatmap_resized) / 255
    
    # Overlaying the heatmap on the original image
    cam_img = cv2.addWeighted(frame, 0.6, (heatmap_resized * 255).astype(np.uint8), 0.4, 0)
    
    # Add species name with white background and black text to the cam_img
    label_size = cv2.getTextSize(predicted_class, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
    cam_img[10:10+label_size[1]+8, 10:10+label_size[0]+8] = [255, 255, 255]
    cv2.putText(cam_img, predicted_class, (15, 10+label_size[1]+3), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA)

    # 결과 표시
    # cv2.imshow('Webcam', frame)
    cv2.imshow('Grad-CAM', cam_img)
    
    if cv2.waitKey(1) == 27: # ESC 키를 누르면 종료
        break

cap.release()
cv2.destroyAllWindows()

In [None]:
# pip freeze > requirements.txt