In [None]:
# 구글 드라이브 마운트
from google.colab import drive
drive.mount('/content/drive')

!nvidia-smi

import os

HOME = "/content/drive/MyDrive/GroundingDINO"  # 구글 드라이브 내 디렉토리 위치
print(HOME)

# HOME 디렉토리가 존재하지 않을 경우 생성
if not os.path.exists(HOME):
    os.makedirs(HOME)

%cd {HOME}

# GroundingDINO 디렉토리가 존재하지 않을 경우에만 git clone 실행
if not os.path.exists(os.path.join(HOME, "GroundingDINO")):
    !git clone https://github.com/IDEA-Research/GroundingDINO.git

%cd {HOME}/GroundingDINO
!pip install -q -e .
!pip install -q roboflow

import os

CONFIG_PATH = os.path.join(HOME, "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py")
print(CONFIG_PATH, "; exist:", os.path.isfile(CONFIG_PATH))

%cd {HOME}

# weights 디렉토리가 존재하지 않을 경우에만 생성
if not os.path.exists(os.path.join(HOME, "weights")):
    os.makedirs(os.path.join(HOME, "weights"))

%cd {HOME}/weights
!wget -q https://github.com/IDEA-Research/GroundingDINO/releases/download/v0.1.0-alpha/groundingdino_swint_ogc.pth

import os

WEIGHTS_NAME = "groundingdino_swint_ogc.pth"
WEIGHTS_PATH = os.path.join(HOME, "weights", WEIGHTS_NAME)
print(WEIGHTS_PATH, "; exist:", os.path.isfile(WEIGHTS_PATH))

%cd {HOME}/GroundingDINO

from groundingdino.util.inference import load_model, load_image, predict, annotate

model = load_model(CONFIG_PATH, WEIGHTS_PATH)

IMAGE_DIR = "/content/drive/MyDrive/bird/sample"  # 이미지 폴더 경로 설정
image_files = [os.path.join(IMAGE_DIR, file) for file in os.listdir(IMAGE_DIR) if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png")]

# 결과 저장 디렉토리 생성
output_dir = "/content/drive/MyDrive/GroundingDINO/output"
os.makedirs(output_dir, exist_ok=True)

TEXT_PROMPT = "bird"  # 인식하고자 하는 객체 이름 설정
max_images = 25  # 저장할 최대 이미지 개수
image_count = 0  # 저장된 이미지 개수

import cv2
import csv
import numpy as np

# CSV 파일 생성
csv_path = os.path.join(output_dir, "bird_boxes.csv")
with open(csv_path, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(["image_path", "bird_count", "x1", "y1", "x2", "y2", "label"])

def calculate_iou(box1, box2):
    x1, y1, x2, y2 = box1
    x3, y3, x4, y4 = box2

    x_left = max(x1, x3)
    y_top = max(y1, y3)
    x_right = min(x2, x4)
    y_bottom = min(y2, y4)

    if x_right < x_left or y_bottom < y_top:
        return 0.0

    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    box1_area = (x2 - x1) * (y2 - y1)
    box2_area = (x4 - x3) * (y4 - y3)

    iou = intersection_area / float(box1_area + box2_area - intersection_area)
    return iou

def calculate_metrics(predicted_boxes, ground_truth_boxes, iou_threshold=0.5):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for gt_box in ground_truth_boxes:
        best_iou = 0
        best_pred_box = None

        for pred_box in predicted_boxes:
            iou = calculate_iou(pred_box, gt_box)
            if iou > best_iou:
                best_iou = iou
                best_pred_box = pred_box

        if best_iou >= iou_threshold:
            true_positives += 1
            predicted_boxes.remove(best_pred_box)
        else:
            false_negatives += 1

    false_positives = len(predicted_boxes)

    precision = true_positives / (true_positives + false_positives + 1e-6)
    recall = true_positives / (true_positives + false_negatives + 1e-6)

    return precision, recall

# 평가 지표 계산 및 기록
iou_threshold = 0.5
precision_list = []
recall_list = []

# 실제 바운딩 박스 정보 저장할 딕셔너리 초기화
ground_truth_boxes = {}

for image_path in image_files:
    if image_count >= max_images:
        break

    image_source, image = load_image(image_path)

    boxes, logits, phrases = predict(
        model=model,
        image=image,
        caption=TEXT_PROMPT,
        box_threshold=0.4,
        text_threshold=0.3,
        device="cuda"  # GPU 사용
    )

    annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)

    # 새 개수 카운팅
    bird_count = len(boxes)

    # 좌측 상단에 새 개수 표시
    cv2.putText(annotated_frame, f"Bird count: {bird_count}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # 결과 이미지 저장
    output_image_path = os.path.join(output_dir, f"{os.path.basename(image_path)}_output.jpg")
    cv2.imwrite(output_image_path, annotated_frame)

    # 라벨링 정보 저장
    output_label_path = os.path.join(output_dir, f"{os.path.basename(image_path)}_output.txt")
    with open(output_label_path, 'w') as f:
        for box, logit, phrase in zip(boxes, logits, phrases):
            x1, y1, x2, y2 = box
            f.write(f"{phrase} {logit:.4f} {x1:.4f} {y1:.4f} {x2:.4f} {y2:.4f}\n")

    # CSV 파일에 바운딩 박스 좌표 저장
    with open(csv_path, 'a', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        for box, phrase in zip(boxes, phrases):
            x1, y1, x2, y2 = box
            csv_writer.writerow([image_path, bird_count, x1, y1, x2, y2, phrase])

    # 실제 바운딩 박스 정보 저장
    ground_truth_boxes[image_path] = []
    with open(csv_path, 'r') as csvfile:
        csv_reader = csv.reader(csvfile)
        next(csv_reader)  # 헤더 건너뛰기
        for row in csv_reader:
            if row[0] == image_path:
                box = [float(coord) for coord in row[2:6]]
                ground_truth_boxes[image_path].append(box)

    # 평가 지표 계산
    precision, recall = calculate_metrics(boxes, ground_truth_boxes[image_path], iou_threshold)
    precision_list.append(precision)
    recall_list.append(recall)

    image_count += 1

# 평균 Precision과 Recall 계산
mean_precision = np.mean(precision_list)
mean_recall = np.mean(recall_list)

# 평가 지표 출력 및 파일 저장
print(f"Mean Precision: {mean_precision:.4f}")
print(f"Mean Recall: {mean_recall:.4f}")

with open(os.path.join(output_dir, "evaluation_metrics.txt"), 'w') as f:
    f.write(f"Mean Precision: {mean_precision:.4f}\n")
    f.write(f"Mean Recall: {mean_recall:.4f}\n")