In [None]:
import cv2
import torch
import numpy as np
from segment_anything import sam_model_registry, SamPredictor
import matplotlib.pyplot as plt

In [None]:
# 1. SAM 모델 준비 (Vit-b 모델 예시)
sam_checkpoint = "sam_vit_b_01ec64.pth"  # 다운로드 필요
model_type = "vit_b"
device = "cuda" if torch.cuda.is_available() else "cpu"

sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
sam.to(device)
predictor = SamPredictor(sam)

# 2. 동영상 프레임 추출 및 크랙 탐지
video_path = "roboinspec.mp4"
cap = cv2.VideoCapture(video_path)
frame_idx = 0

In [None]:
# # [추가] 저장할 동영상 이름, 코덱, 프레임 레이트, 해상도 설정
# fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # 또는 'XVID', 'DIVX'
# save_path = 'crack_detection_result.mp4'
# fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25  # 프레임레이트, 25로 fallback
# out = cv2.VideoWriter(save_path, fourcc, fps, (960, 540))  # (width, height)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    frame = cv2.resize(frame, (960, 540))
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    predictor.set_image(rgb_frame)

    # [중요] 크랙 후보 영역을 찾기 위한 자동 프롬프트(예: 그리드 샘플링)
    input_points = []
    H, W = rgb_frame.shape[:2]
    grid_size = 5  # 5x5 포인트로 프롬프트
    for y in np.linspace(0, H-1, grid_size, dtype=int):
        for x in np.linspace(0, W-1, grid_size, dtype=int):
            input_points.append([x, y])
    input_points = np.array(input_points)

    masks, scores, _ = predictor.predict(
        point_coords=input_points,
        point_labels=np.ones(len(input_points)),  # foreground prompt
        multimask_output=True,
    )
    # 각 프롬프트의 최상위 마스크 선택 (SAC 논문은 추가 후처리 적용)
    best_mask = masks[np.argmax(scores, axis=0)].max(axis=0)

    # 크랙의 얇은 구조 강조 (윤곽선, 형태학적 연산)
    crack_mask = (best_mask > 0).astype(np.uint8) * 255
    crack_mask = cv2.morphologyEx(crack_mask, cv2.MORPH_OPEN, np.ones((3,1), np.uint8))  # 가늘게
    crack_mask = cv2.morphologyEx(crack_mask, cv2.MORPH_OPEN, np.ones((1,3), np.uint8))  # 가늘게

    # 결과 시각화 또는 저장
    # 1. 크랙 마스크와 프레임의 해상도를 일치
    crack_mask_resized = cv2.resize(crack_mask, (frame.shape[1], frame.shape[0]))

    # 2. 1채널(흑백) -> 3채널(BGR)
    crack_mask_bgr = cv2.cvtColor(crack_mask_resized, cv2.COLOR_GRAY2BGR)

    # 3. addWeighted로 합성
    overlay = cv2.addWeighted(frame, 0.7, crack_mask_bgr, 0.3, 0)
    cv2.imshow('Crack Detection', overlay)
    
    # out.write(overlay)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    # 필요하면 결과 저장
    # cv2.imwrite(f"results/frame_{frame_idx:04d}.png", overlay)
    frame_idx += 1

cap.release()
# out.release()
cv2.destroyAllWindows()


## YOLO11

In [9]:
import cv2
import numpy as np
import ultralytics
from ultralytics import YOLO

ultralytics.checks()

Ultralytics 8.3.141  Python-3.10.16 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11264MiB)
Setup complete  (12 CPUs, 31.9 GB RAM, 409.1/475.5 GB disk)


In [None]:
# # Train
# # Load a model
# model = YOLO("yolo11n-seg.pt")  # load a pretrained model (recommended for training)

# # Train the model
# results = model.train(data="crack-seg.yaml", epochs=3, imgsz=640, batch=64, workers=64)

In [4]:
# Load a model
model = YOLO("models/best.pt")  # load a fine-tuned model

# Inference using the model (img/video/stream)
results = model.predict("test.png", save=True)


image 1/1 c:\Users\minja\GitHub\coding-test\\test.png: 448x640 (no detections), 34.5ms
Speed: 2.5ms preprocess, 34.5ms inference, 0.5ms postprocess per image at shape (1, 3, 448, 640)
Results saved to [1mruns\segment\predict[0m


In [None]:
import cv2
import ultralytics
from ultralytics import YOLO

ultralytics.checks()

video_path = "roboinspec.mp4"
cap = cv2.VideoCapture(video_path)
frame_idx = 0

model = YOLO("models/best.pt")  # load a fine-tuned model

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    
    frame = cv2.resize(frame, (960, 540))
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    results = model.predict(rgb_frame, save=False)
    # 결과 객체에서 시각화된 이미지를 얻음 (넘파이 BGR 이미지)
    result_img = results[0].plot()
    
    cv2.imshow('Crack Detection', result_img)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    frame_idx += 1

cap.release()
cv2.destroyAllWindows()

Ultralytics 8.3.141  Python-3.10.16 torch-2.5.1+cu121 CUDA:0 (NVIDIA GeForce RTX 2080 Ti, 11264MiB)
Setup complete  (12 CPUs, 31.9 GB RAM, 409.0/475.5 GB disk)

0: 384x640 2 cracks, 28.8ms
Speed: 3.4ms preprocess, 28.8ms inference, 82.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cracks, 8.0ms
Speed: 1.5ms preprocess, 8.0ms inference, 2.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cracks, 7.3ms
Speed: 1.2ms preprocess, 7.3ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cracks, 5.9ms
Speed: 1.5ms preprocess, 5.9ms inference, 2.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 cracks, 6.8ms
Speed: 1.7ms preprocess, 6.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 crack, 8.2ms
Speed: 1.2ms preprocess, 8.2ms inference, 2.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 crack, 6.8ms
Speed: 1.9ms preprocess, 6.8ms inference, 2.0ms postprocess per ima