In [None]:
# python native
import os
import json
import random
import datetime
from functools import partial

# external library
import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.model_selection import GroupKFold

# visualization
import matplotlib.pyplot as plt

# torch
import torch

## Image Path Setting

In [None]:
IMAGE_ROOT = "/data/ephemeral/data/train/DCM"
LABEL_ROOT = "/data/ephemeral/data/train/outputs_json"

## Load Dataset

In [None]:
CLASSES = [
    'finger-1', 'finger-2', 'finger-3', 'finger-4', 'finger-5',
    'finger-6', 'finger-7', 'finger-8', 'finger-9', 'finger-10',
    'finger-11', 'finger-12', 'finger-13', 'finger-14', 'finger-15',
    'finger-16', 'finger-17', 'finger-18', 'finger-19', 'Trapezium',
    'Trapezoid', 'Capitate', 'Hamate', 'Scaphoid', 'Lunate',
    'Triquetrum', 'Pisiform', 'Radius', 'Ulna',
]
CLASS2IND = {v: i for i, v in enumerate(CLASSES)}
IND2CLASS = {v: k for k, v in CLASS2IND.items()}

pngs = {
    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)
    for root, _dirs, files in os.walk(IMAGE_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".png"
}

jsons = {
    os.path.relpath(os.path.join(root, fname), start=LABEL_ROOT)
    for root, _dirs, files in os.walk(LABEL_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".json"
}

## ValidationSet에 대한 예측 결과 불러오기

In [None]:
df = pd.read_csv('')

In [None]:
len(df.iloc[0]['rle'])

In [None]:
df.head()

In [None]:
df = df[~df['image_name'].isin(['image1661130828152', 'image1661130891365'])]

In [None]:
list(pngs)[1].split("/")[1]

image_dict = {name.split("/")[1]:name for name in list(pngs)}
label_dict = {name.split("/")[1][:-4]+'png':name for name in list(jsons)}

df["image_path"] = df['image_name'].apply(lambda x: image_dict[x])
df["label_path"] = df['image_name'].apply(lambda x: label_dict[x])

In [None]:
image_dict['image1661130828152_R.png']

In [None]:
print(image_dict['image1661130828152_R.png'])

## Decoding method (rle to mask)

In [None]:
def rle_to_mask(rle, height, width):
    mask = np.zeros(height * width, dtype=np.uint8)
    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2] - 1
    lengths = array[1::2]
    for start, length in zip(starts, lengths):
        mask[start:start + length] = 1
    return mask.reshape((height, width))

In [None]:
def mask_to_poly_seg(mask):
    points = np.argwhere(mask == 1)
    points = points[:, [1, 0]]  # (row, col) => (x, y)
    return points.tolist()

In [None]:
df.head()

## Validation Dataset에 대해 클래스 별 over & under pixels 계산

In [None]:
def calculate_average_over_under_pixels(df):
    # 클래스별 전체 결과를 저장할 딕셔너리
    total_class_metrics = {class_name: {"gt_pixels": 0, "over_pixels": 0, "under_pixels": 0, "image_count": 0} for class_name in CLASSES}
    img_names = df['image_name'].unique()  # 이미지 이름 목록

    for idx in tqdm(range(len(img_names)), desc="Processing images", unit="image"):
        # 현재 이미지에 해당하는 데이터
        df_ = df[df['image_name'] == img_names[idx]]
        label_path = df_['label_path'].iloc[0]

        # GT 라벨 생성
        gt_label = None
        with open(os.path.join(LABEL_ROOT, label_path), "r") as f:
            annotations = json.load(f)
        annotations = annotations["annotations"]

        for ann in annotations:
            c = ann["label"]
            class_ind = CLASS2IND[c]
            points = np.array(ann["points"])

            if gt_label is None:
                gt_label = np.zeros((2048, 2048, len(CLASSES)), dtype=np.uint8)

            class_label = np.zeros(gt_label.shape[:2], dtype=np.uint8)
            cv2.fillPoly(class_label, [points], 1)
            gt_label[..., class_ind] = class_label

        # 예측 라벨 생성
        pred_label = np.zeros_like(gt_label, dtype=np.uint8)
        for _, row in df_.iterrows():
            c = row['class']
            class_ind = CLASS2IND[c]
            rle = row['rle']
            mask = rle_to_mask(rle, gt_label.shape[0], gt_label.shape[1])
            pred_label[..., class_ind] = mask

        # 이미지별 Over/Under-Prediction 계산
        for class_ind in range(len(CLASSES)):
            if class_ind != 26:
                continue
            class_name = CLASSES[class_ind]
            class_gt = gt_label[..., class_ind]
            class_pred = pred_label[..., class_ind]

            over_pixels = np.sum(class_pred > class_gt)
            under_pixels = np.sum(class_pred < class_gt)
            gt_pixels = np.sum(class_gt)

            # 클래스별로 결과 누적
            total_class_metrics[class_name]["gt_pixels"] += gt_pixels
            total_class_metrics[class_name]["over_pixels"] += over_pixels
            total_class_metrics[class_name]["under_pixels"] += under_pixels
            total_class_metrics[class_name]["image_count"] += 1  # 해당 클래스가 등장한 이미지 수

    # 평균 계산
    avg_metrics = {}
    for class_name, metrics in total_class_metrics.items():
        image_count = metrics["image_count"]  # 해당 클래스가 등장한 이미지 수
        if image_count > 0:
            avg_metrics[class_name] = {
                "avg_over_pixels": metrics["over_pixels"] / image_count,
                "avg_under_pixels": metrics["under_pixels"] / image_count,
            }
        else:
            avg_metrics[class_name] = {"avg_over_pixels": 0, "avg_under_pixels": 0}

    return avg_metrics



In [None]:
image_metrics = calculate_average_over_under_pixels(df)

In [None]:
for class_name, metrics in image_metrics.items():
    print(f"Class: {class_name}")
    print(f"  Average Over Pixels: {metrics['avg_over_pixels']:.2f}")
    print(f"  Average Under Pixels: {metrics['avg_under_pixels']:.2f}")

In [None]:
import matplotlib.pyplot as plt

# over_pixels, under_pixels의 합을 계산
total_over_pixels = sum(metrics['avg_over_pixels'] for metrics in image_metrics.values())
total_under_pixels = sum(metrics['avg_under_pixels'] for metrics in image_metrics.values())

# 시각화
labels = ['Over Pixels', 'Under Pixels']
values = [total_over_pixels, total_under_pixels]

print(f'total over pixels: {total_over_pixels}')
print(f'total under pixels: {total_under_pixels}')

# 막대 차트 그리기
fig, ax = plt.subplots(figsize=(8, 6))
ax.bar(labels, values, color=['blue', 'red'])

# 제목 및 레이블
ax.set_title('Total Over and Under Pixels')
ax.set_ylabel('Pixel Count')

# 그래프 표시
plt.tight_layout()
plt.show()


## 각 클래스 별 over & under predict 비율에 따라 threshold 조정

In [None]:
def adjust_thresholds_dynamic(avg_metrics, initial_thresholds, base_rate=[0.05, 0.10, 0.15, 0.20], max_adjust=0.3, min_ratio=0.5, max_ratio=2.0):
    """
    Under/Over 비율 기반으로 threshold를 동적으로 조정 (비율에 따라 조정 폭이 변동)
    :param avg_metrics: 클래스별 평균 over/under 픽셀 수 딕셔너리
    :param initial_thresholds: 초기 threshold 값 딕셔너리
    :param base_rate: 비율이 1일 때의 기본 조정 폭 (예: [0.05, 0.10, 0.15, 0.2])
    :param max_adjust: 최대 조정 폭 제한 (예: 0.3)
    :param min_ratio: Under/Over 비율의 최소 허용치 (0.5 미만이면 threshold 증가)
    :param max_ratio: Under/Over 비율의 최대 허용치 (2.0 초과면 threshold 감소)
    :return: 4개의 조정된 threshold 리스트 (각각의 비율에 따라)
    """
    # 각 비율에 맞는 threshold들을 계산할 리스트
    adjusted_thresholds = {base_rate[0]: [], base_rate[1]: [], base_rate[2]: [], base_rate[3]: []}
    
    for class_name, metrics in avg_metrics.items():
        over_pixels = metrics["avg_over_pixels"]
        under_pixels = metrics["avg_under_pixels"]
        
        # 비율 계산 (0으로 나누는 상황 방지)
        if over_pixels > 0:
            ratio = under_pixels / over_pixels
        else:
            ratio = float('inf')  # Over-pixels가 0이면 비율이 무한대로 간주
        
        print(f"Class: {class_name}")
        print(f"  Over Pixels: {over_pixels}, Under Pixels: {under_pixels}, Ratio: {ratio:.2f}")
        
        # 현재 클래스의 threshold
        current_threshold = initial_thresholds[class_name]
        
        # 각 비율을 기준으로 threshold를 동적으로 조정하고, 그 결과를 4개의 리스트에 저장
        for rate in base_rate:
            if ratio < min_ratio:  # 비율이 너무 작을 경우 threshold 증가
                adjust_amount = min(rate, max_adjust)
                adjusted_threshold = current_threshold + adjust_amount
            elif min_ratio <= ratio < 1:  # 비율이 1보다 작은 경우
                adjust_amount = min(rate / ratio, max_adjust)
                adjusted_threshold = current_threshold + adjust_amount
            elif 1 <= ratio <= max_ratio:  # 비율이 1과 max_ratio 사이
                adjust_amount = min(rate * ratio, max_adjust)
                adjusted_threshold = current_threshold - adjust_amount
            else:  # 비율이 너무 클 경우 threshold 감소
                adjust_amount = min(rate, max_adjust)
                adjusted_threshold = current_threshold - adjust_amount

            # Threshold 값 제한
            adjusted_threshold = max(0.2, min(0.8, adjusted_threshold))
            
            # 해당 rate에 해당하는 threshold 리스트에 추가
            adjusted_thresholds[rate].append(adjusted_threshold)
            
            print(f"  Adjusted Threshold (base_rate={rate}): {adjusted_threshold:.2f} (Adjust Amount: {adjust_amount:.2f})")
    
    return adjusted_thresholds


In [None]:
# 초기 threshold 설정
initial_thresholds = {class_name: 0.5 for class_name in CLASSES}

# 평균 over/under 비율로 threshold 조정
new_thresholds = adjust_thresholds_dynamic(image_metrics, initial_thresholds)

In [None]:
for key, value in new_thresholds.items():
    print(f'{key}: {value}')