In [1]:
"""
로컬에서 "직접" 아래의 파일을 생성합니다.
- FULL_DICT.json
- PARTIAL_DICT.json
- NO_DICT.json
- err_image_paths.txt

제가 데이터 공급해드리므로 돌리실 필요는 없습니다.
"""


import torch
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm
import gc
import os

# 기본 경로 설정
ROOT_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(ROOT_DIR, "data")
IMAGE_DIR = os.path.join(DATA_DIR, "train_images")
ANNOT_DIR = os.path.join(DATA_DIR, "train_annotations")

full_dict_path = os.path.join(DATA_DIR, "FULL_DICT.json")
err_txt_path = os.path.join(DATA_DIR, "err_image_paths.txt")
fixed_dict_path = os.path.join(DATA_DIR, "FIXED_DICT.json")
partial_dict_path = os.path.join(DATA_DIR, "PARTIAL_DICT.json")
no_dict_path = os.path.join(DATA_DIR, "NO_DICT.json")

# 시각화 관련 설정
try:
    plt.rcParams['font.family'] = 'Apple SD Gothic Neo'
except:
    try:
        plt.rcParams['font.family'] = 'NanumGothic'
    except:
        plt.rcParams['font.family'] = 'AppleGothic'

plt.rcParams['axes.unicode_minus'] = False
fm._load_fontmanager(try_read_cache=False)


# 디바이스 설정
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
    DEVICE = torch.device("mps") # 맥 GPU
elif torch.cuda.is_available():
    DEVICE = torch.device("cuda:0") # 윈도우 GPU
else:
    DEVICE = torch.device("cpu") # CPU


# 캐시 지우기 함수 생성
def clean_cache():
    gc.collect()
    if torch.backends.mps.is_available() and torch.backends.mps.is_built():
        torch.mps.empty_cache()
    elif torch.cuda.is_available():
        torch.cuda.empty_cache()

# MallocStackLogging 에러 출력 방지
os.environ.pop("MallocStackLogging", None)
os.environ.pop("MallocStackLoggingNoCompact", None)
os.environ.pop("DYLD_INSERT_LIBRARIES", None)


# # 로그
# import logging

# def init_logger() -> logging.Logger:
#     logging.basicConfig(
#         format="%(asctime)s [%(levelname)s] (%(filename)s:%(lineno)d) - %(message)s",
#         datefmt="%Y-%m-%d %H:%M:%S",
#         level=logging.INFO,
#         encoding="utf-8",
#     )
#     return logging.getLogger("")

# logger = init_logger()

Matplotlib is building the font cache; this may take a moment.


KeyboardInterrupt: 

In [None]:
import glob

image_paths = sorted(glob.glob(os.path.join(IMAGE_DIR, "*.png")))

print(f"· 이미지 파일: {len(image_paths)}개")


annot_folders = [folder for folder in sorted(glob.glob(os.path.join(ANNOT_DIR, "*")))
                if folder.split("/")[-1].startswith("K-")]

annot_sub_folders = []

for subfolder in annot_folders:
    if subfolder.split("/")[-1].startswith("K-"):
        annot_sub_folders.extend(sorted(glob.glob(os.path.join(subfolder, "*"))))

annot_paths = []

for file in annot_sub_folders:
    if file.split("/")[-1].startswith("K-"):
        annot_paths.extend(sorted(glob.glob(os.path.join(file, "*.json"))))
        
print(f"· 어노테이션 파일: {len(annot_paths)}개") 

· 이미지 파일: 651개
· 어노테이션 파일: 1001개


In [None]:
# 전체 / 일부 / 결손 데이터 나누기

FULL_DICT = dict()
PARTIAL_DICT = dict()
NO_DICT = dict()
only_annot_set = set(annot_paths)

for image_path in image_paths:

    image_name = image_path.split("/data/train_images/")[1]
    image_simple_name = image_name.split(".")[0].split("_0_2_0_2")[0]

    pill_nums = image_simple_name.split("K-")[1].split("-")
    pill_count = len(pill_nums)
    count = 0
    tmp_list = []

    for pill_num in pill_nums:
        annot_path = f"{ANNOT_DIR}/{image_simple_name}_json/K-{pill_num}/{image_name.split('.')[0]}.json"

        if annot_path in annot_paths:
            tmp_list.append(annot_path.split("train_annotations/")[1])
            only_annot_set.remove(annot_path)
            count += 1
    
    if count == pill_count:
        FULL_DICT[image_name] = tmp_list
    elif count == 0:
        NO_DICT[image_name] = tmp_list
    else:
        PARTIAL_DICT[image_name] = tmp_list


import json

with open(full_dict_path, "w", encoding="utf-8") as f:
    json.dump(FULL_DICT, f, indent=4)

with open(partial_dict_path, "w", encoding="utf-8") as f:
    json.dump(PARTIAL_DICT, f, indent=4)
    
with open(no_dict_path, "w", encoding="utf-8") as f:
    json.dump(NO_DICT, f, indent=4)