In [5]:
import torch
import torch.nn as nn
from PIL import Image
import cv2
import time
import numpy as np
from torchinfo import torchinfo
from thop import profile

In [6]:
class MaskClassifier(nn.Module):
    def __init__(self):
        super(MaskClassifier, self).__init__()
        
        # Feature Extraction - 프루닝된 구조
        self.features = nn.Sequential(
            # First Block
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.2),
            
            # Second Block
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.2),
            
            # Third Block
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.2),
        )
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.Linear(64, 2)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [7]:
# 1. 모델 로드
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device('cpu')
model = MaskClassifier().to(device)

In [8]:
# 학습된 가중치 로드
model.load_state_dict(torch.load("pt_mask_classifier.pth", map_location=device))
model.eval()

MaskClassifier(
  (features): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Dropout2d(p=0.2, inplace=False)
    (5): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU(inplace=True)
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Dropout2d(p=0.2, inplace=False)
    (10): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Dropout2d(p=0.2, inplace=False)
  )

In [9]:
# 이미지 전처리 함수
def preprocess_image(image):
    # 이미지 크기 조정
    image = cv2.resize(image, (128, 128))
    
    # BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # 정규화 (ImageNet stats)
    image = image.astype(np.float32) / 255.0
    image = (image - np.array([0.485, 0.456, 0.406])) / np.array([0.229, 0.224, 0.225])
    
    # (H, W, C) -> (C, H, W)
    image = image.transpose(2, 0, 1)
    
    # numpy -> tensor
    image = torch.FloatTensor(image).unsqueeze(0)
    return image

In [10]:
def infer_webcam():
    # 모델 로드
    model = MaskClassifier().to(device)
    model.load_state_dict(torch.load("pt_mask_classifier.pth", map_location=device))
    model.eval()

    # 얼굴 검출을 위한 cascade classifier 로드
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    # cascade_path = "/usr/share/opencv4/haarcascades/haarcascade_frontalface_default.xml"  # 일반적인 Linux 설치 경로
    # face_cascade = cv2.CascadeClassifier(cascade_path)
    
    cap = cv2.VideoCapture(0)
    dataset_classes = ["With Mask", "Without Mask"]

    print("Press 'q' to quit.")
    
    # 성능 측정을 위한 변수들
    frame_count = 0
    total_time = 0
    total_inference_time = 0
    prev_time = time.time()
    fps = 0
    
    while True:
        frame_start_time = time.time()
        ret, frame = cap.read()
        if not ret:
            break
        
        # 좌우 반전
        frame = cv2.flip(frame, 1)
        
        # 밝기 조정 (화면 어둡게 하기)
        brightness_offset = 0  # 밝기를 낮출 값 (0~255)
        frame = cv2.convertScaleAbs(frame, alpha=1, beta=-brightness_offset)
        
        # 얼굴 검출
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(60, 60)
        )
        
        # FPS 계산
        current_time = time.time()
        fps = 1 / (current_time - prev_time)
        prev_time = current_time
        
        # 검출된 얼굴에 대해 마스크 분류 수행
        for (x, y, w, h) in faces:
            # 얼굴 영역 추출
            face_roi = frame[max(0, y-30):min(frame.shape[0], y+h+30), 
                           max(0, x-30):min(frame.shape[1], x+w+30)]
            
            if face_roi.size != 0:
                # 추론 시간 측정 시작
                inference_start = time.time()
                
                # 마스크 분류
                input_tensor = preprocess_image(face_roi).to(device)
                with torch.no_grad():
                    output = model(input_tensor)
                    _, pred = torch.max(output, 1)
                
                # 추론 시간 측정 종료
                inference_time = time.time() - inference_start
                total_inference_time += inference_time
                
                # 결과 표시
                label = dataset_classes[pred.item()]
                color = (0, 255, 0) if "With" in label else (0, 0, 255)
                
                # 얼굴 영역 표시
                cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
                
                # 라벨과 추론 시간 표시
                cv2.putText(frame, f"{label} ({inference_time*1000:.1f}ms)", 
                          (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
        
        # 프레임 처리 완료 시간 계산
        frame_time = time.time() - frame_start_time
        total_time += frame_time
        frame_count += 1
        
        # 성능 지표 표시
        cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Frame Time: {frame_time*1000:.1f}ms", (10, 70), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        # 결과 표시
        cv2.imshow("Mask Detection", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # 최종 성능 통계 계산
    avg_fps = frame_count / total_time
    avg_frame_time = total_time / frame_count
    avg_inference_time = total_inference_time / frame_count if frame_count > 0 else 0
    
    print("\n=== Performance Statistics ===")
    print(f"Total Frames: {frame_count}")
    print(f"Average FPS: {avg_fps:.1f}")
    print(f"Average Frame Time: {avg_frame_time*1000:.1f}ms")
    print(f"Average Inference Time: {avg_inference_time*1000:.1f}ms")

    cap.release()
    cv2.destroyAllWindows()

In [11]:
# 실시간 웹캠 추론 실행
infer_webcam()

Press 'q' to quit.

=== Performance Statistics ===
Total Frames: 162
Average FPS: 17.7
Average Frame Time: 56.6ms
Average Inference Time: 12.2ms


In [12]:

def infer_csi_camera():
    """
    Jetson Nano의 CSI 카메라를 활용한 실시간 추론 함수
    """
    # 모델 로드
    model = MaskClassifier().to(device)
    model.load_state_dict(torch.load("pt_mask_classifier.pth", map_location=device))
    model.eval()

    # 얼굴 검출을 위한 cascade classifier 로드
    # face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    cascade_path = "/usr/share/opencv4/haarcascades/haarcascade_frontalface_default.xml"  # 일반적인 Linux 설치 경로
    face_cascade = cv2.CascadeClassifier(cascade_path)
    
    # GStreamer 파이프라인 정의
    gst_pipeline = (
        "nvarguscamerasrc ! "
        "video/x-raw(memory:NVMM), width=640, height=480, format=(string)NV12, framerate=30/1 ! "
        "nvvidconv flip-method=0 ! "
        "video/x-raw, width=640, height=480, format=(string)BGRx ! "
        "videoconvert ! "
        "video/x-raw, format=(string)BGR ! appsink"
    )

    cap = cv2.VideoCapture(gst_pipeline, cv2.CAP_GSTREAMER)
    if not cap.isOpened():
        print("CSI 카메라를 열 수 없습니다.")
        return

    dataset_classes = ["With Mask", "Without Mask"]
    print("Press 'q' to quit.")
    
    # 성능 측정을 위한 변수들
    frame_count = 0
    total_time = 0
    total_inference_time = 0
    prev_time = time.time()
    fps = 0
    
    while True:
        frame_start_time = time.time()
        ret, frame = cap.read()
        if not ret:
            print("카메라 프레임을 읽을 수 없습니다.")
            break
        
        # 좌우 반전
        frame = cv2.flip(frame, 1)
        
        # 밝기 조정 (화면 어둡게 하기)
        brightness_offset = 50  # 밝기를 낮출 값 (0~255)
        frame = cv2.convertScaleAbs(frame, alpha=1, beta=-brightness_offset)
        
        # 얼굴 검출
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(
            gray,
            scaleFactor=1.1,
            minNeighbors=5,
            minSize=(60, 60)
        )
        
        # FPS 계산
        current_time = time.time()
        fps = 1 / (current_time - prev_time)
        prev_time = current_time
        
        # 검출된 얼굴에 대해 마스크 분류 수행
        for (x, y, w, h) in faces:
            # 얼굴 영역 추출
            face_roi = frame[max(0, y-30):min(frame.shape[0], y+h+30), 
                           max(0, x-30):min(frame.shape[1], x+w+30)]
            
            if face_roi.size != 0:
                # 추론 시간 측정 시작
                inference_start = time.time()
                
                # 마스크 분류
                input_tensor = preprocess_image(face_roi).to(device)
                with torch.no_grad():
                    output = model(input_tensor)
                    _, pred = torch.max(output, 1)
                
                # 추론 시간 측정 종료
                inference_time = time.time() - inference_start
                total_inference_time += inference_time
                
                # 결과 표시
                label = dataset_classes[pred.item()]
                color = (0, 255, 0) if "With" in label else (0, 0, 255)
                
                # 얼굴 영역 표시
                cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
                
                # 라벨과 추론 시간 표시
                cv2.putText(frame, f"{label} ({inference_time*1000:.1f}ms)", 
                          (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
        
        # 프레임 처리 완료 시간 계산
        frame_time = time.time() - frame_start_time
        total_time += frame_time
        frame_count += 1
        
        # 성능 지표 표시
        cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.putText(frame, f"Frame Time: {frame_time*1000:.1f}ms", (10, 70), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
        # 결과 표시
        cv2.imshow("CSI Camera Mask Detection", frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # 최종 성능 통계 계산
    avg_fps = frame_count / total_time
    avg_frame_time = total_time / frame_count
    avg_inference_time = total_inference_time / frame_count if frame_count > 0 else 0
    
    print("\n=== Performance Statistics ===")
    print(f"Total Frames: {frame_count}")
    print(f"Average FPS: {avg_fps:.1f}")
    print(f"Average Frame Time: {avg_frame_time*1000:.1f}ms")
    print(f"Average Inference Time: {avg_inference_time*1000:.1f}ms")

    cap.release()
    cv2.destroyAllWindows()

In [13]:
# 5. 실시간 추론 실행
infer_csi_camera()

CSI 카메라를 열 수 없습니다.


In [14]:
def count_parameters(model):
    """Calculate total, trainable, zero and non-zero parameters"""
    total_params = 0
    trainable_params = 0
    zero_params = 0
    nonzero_params = 0
    
    for p in model.parameters():
        total_params += p.numel()
        if p.requires_grad:
            trainable_params += p.numel()
        
        # Count zero and non-zero parameters
        zero_params += torch.sum(p == 0).item()
        nonzero_params += torch.sum(p != 0).item()
    
    return {
        'total': total_params,
        'trainable': trainable_params,
        'zero': zero_params,
        'nonzero': nonzero_params
    }

def calculate_flops(model):
    """Calculate FLOPs considering zero parameters"""
    input_tensor = torch.randn(1, 3, 128, 128)
    macs, params = profile(model, inputs=(input_tensor,))
    
    flops = 2 * macs  # Convert MACs to FLOPs
    
    # 0이 아닌 파라미터의 비율 계산
    total_params = 0
    nonzero_params = 0
    for name, param in model.named_parameters():
        if 'weight' in name:  # weight 파라미터만 고려
            total_params += param.numel()
            nonzero_params += torch.sum(param != 0).item()
    
    # 실제 수행되는 연산량 추정
    sparsity = 1 - (nonzero_params / total_params)
    actual_flops = flops * (1 - sparsity)
    
    return {
        'raw_flops': flops,
        'actual_flops': actual_flops,
        'sparsity': sparsity * 100
    }
    
def get_model_size(model):
    """Calculate model size in various units"""
    # state_dict()를 통한 실제 메모리 사용량 계산
    param_size = 0
    buffer_size = 0
    
    # 파라미터 크기 계산
    for param in model.state_dict().values():
        param_size += param.numel() * param.element_size()
    
    # 버퍼 크기 계산 (BN의 running mean/var 등)
    for buffer in model.buffers():
        buffer_size += buffer.numel() * buffer.element_size()
        
    # 총 크기 계산
    total_size = param_size + buffer_size
    
    # 다양한 단위로 변환
    size_bytes = total_size
    size_kb = total_size / 1024
    size_mb = size_kb / 1024
    
    return {
        'bytes': size_bytes,
        'kb': size_kb,
        'mb': size_mb,
        'params': sum(p.numel() for p in model.parameters()),
        'param_size': param_size,
        'buffer_size': buffer_size
    }

In [15]:
def print_model_analysis():
    """모델 분석 결과 출력 함수"""
    # 기존 분석
    params = count_parameters(model)
    flops_info = calculate_flops(model)
    
    # 모델 크기 분석 추가
    size_info = get_model_size(model)
    
    print("\n=== Model Analysis ===")
    print(f"Total Parameters: {params['total']:,}")
    print(f"├─ Non-zero Parameters: {params['nonzero']:,} ({params['nonzero']/params['total']*100:.1f}%)")
    print(f"└─ Zero Parameters: {params['zero']:,} ({params['zero']/params['total']*100:.1f}%)")
    print(f"Model Sparsity: {params['zero']/params['total']*100:.1f}%")
    
    print(f"\nModel Size:")
    print(f"├─ Total Size: {size_info['mb']:.2f} MB")
    print(f"├─ Parameter Memory: {size_info['param_size']/1024/1024:.2f} MB")
    print(f"└─ Buffer Memory: {size_info['buffer_size']/1024/1024:.2f} MB")
    
    print(f"\nFLOPs Analysis:")
    print(f"├─ Raw FLOPs: {flops_info['raw_flops']:,}")
    print(f"├─ Weight Sparsity: {flops_info['sparsity']:.1f}%")
    print(f"└─ Estimated Actual FLOPs: {flops_info['actual_flops']:,.0f}")

In [16]:
print_model_analysis()

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_normalization() for <class 'torch.nn.modules.batchnorm.BatchNorm2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.activation.ReLU'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.pooling.MaxPool2d'>.
[INFO] Register zero_ops() for <class 'torch.nn.modules.container.Sequential'>.
[INFO] Register count_adap_avgpool() for <class 'torch.nn.modules.pooling.AdaptiveAvgPool2d'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.

=== Model Analysis ===
Total Parameters: 23,938
├─ Non-zero Parameters: 23,938 (100.0%)
└─ Zero Parameters: 0 (0.0%)
Model Sparsity: 0.0%

Model Size:
├─ Total Size: 0.09 MB
├─ Parameter Memory: 0.09 MB
└─ Buffer Memory: 0.00 MB

FLOPs Analysis:
├─ Raw FLOPs: 93,356,416.0
├─ Weight Sparsity: 0.0%
└─ Estimated Actual FLOPs: 93,356,416


In [17]:
# Detailed model summary
print("\n=== Model Architecture Summary ===")
torchinfo.summary(model, input_size=(1, 3, 128, 128), 
                 col_names=["input_size", "output_size", "num_params", "kernel_size", "mult_adds"])


=== Model Architecture Summary ===


Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Kernel Shape              Mult-Adds
MaskClassifier                           --                        --                        --                        --                        --
├─Sequential: 1-1                        [1, 3, 128, 128]          [1, 64, 16, 16]           --                        --                        --
│    └─Conv2d: 2-1                       [1, 3, 128, 128]          [1, 16, 128, 128]         448                       [3, 16, 3, 3]             7,340,032
│    └─BatchNorm2d: 2-2                  [1, 16, 128, 128]         [1, 16, 128, 128]         32                        [16]                      32
│    └─ReLU: 2-3                         [1, 16, 128, 128]         [1, 16, 128, 128]         --                        --                        --
│    └─MaxPool2d: 2-4                    [1, 16, 128, 128]         [1, 16, 64, 64]           --   