In [2]:
# python native
import os
import json
import random
import datetime
from functools import partial

# external library
import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.model_selection import GroupKFold
import albumentations as A

# torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models

# visualization
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [3]:
IMAGE_ROOT = "/data/ephemeral/home/level2-cv-semanticsegmentation-cv-06-lv3/data/fold_0/images"
LABEL_ROOT = "/data/ephemeral/home/level2-cv-semanticsegmentation-cv-06-lv3/data/fold_0/annos"

In [4]:
CLASSES = [
    'finger-1', 'finger-2', 'finger-3', 'finger-4', 'finger-5',
    'finger-6', 'finger-7', 'finger-8', 'finger-9', 'finger-10',
    'finger-11', 'finger-12', 'finger-13', 'finger-14', 'finger-15',
    'finger-16', 'finger-17', 'finger-18', 'finger-19', 'Trapezium',
    'Trapezoid', 'Capitate', 'Hamate', 'Scaphoid', 'Lunate',
    'Triquetrum', 'Pisiform', 'Radius', 'Ulna',
]

In [5]:
CLASS2IND = {v: i for i, v in enumerate(CLASSES)}
IND2CLASS = {v: k for k, v in CLASS2IND.items()}


In [6]:
pngs = {
    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)
    for root, _dirs, files in os.walk(IMAGE_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".png"
}

In [7]:
print("Sample PNG paths:", list(pngs)[:5])

Sample PNG paths: ['ID368_image1665450178483.png', 'ID013_image1661216788592.png', 'ID113_image1661908013121.png', 'ID097_image1661823399448.png', 'ID384_image1665536548947.png']


In [8]:
len(pngs)

640

In [9]:
jsons = {
    os.path.relpath(os.path.join(root, fname), start=LABEL_ROOT)
    for root, _dirs, files in os.walk(LABEL_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".json"
}

In [10]:
len(jsons)

640

In [11]:
jsons_fn_prefix = {os.path.splitext(fname)[0] for fname in jsons}
pngs_fn_prefix = {os.path.splitext(fname)[0] for fname in pngs}

assert len(jsons_fn_prefix - pngs_fn_prefix) == 0
assert len(pngs_fn_prefix - jsons_fn_prefix) == 0

In [12]:
pngs = sorted(pngs)
jsons = sorted(jsons)

In [None]:
class XRayDataset(Dataset):
    def __init__(self, is_train=True, transforms=None):
        _filenames = np.array(pngs)
        _labelnames = np.array(jsons)
                
        self.filenames = _filenames
        self.labelnames = _labelnames
        self.is_train = is_train
        self.transforms = transforms
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, item):
        image_name = self.filenames[item]
        image_path = os.path.join(IMAGE_ROOT, image_name)
        image = cv2.imread(image_path)
        # image = image / 255.
        
        label_name = self.labelnames[item]
        label_path = os.path.join(LABEL_ROOT, label_name)
        
        # (H, W, NC) 모양의 label을 생성합니다.
        label_shape = tuple(image.shape[:2]) + (len(CLASSES), )
        label = np.zeros(label_shape, dtype=np.uint8)
        
        # label 파일을 읽습니다.
        with open(label_path, "r") as f:
            annotations = json.load(f)
        annotations = annotations["annotations"]
        
        # 클래스 별로 처리합니다.
        for ann in annotations:
            c = ann["label"]
            class_ind = CLASS2IND[c]
            points = np.array(ann["points"])
            
            # polygon 포맷을 dense한 mask 포맷으로 바꿉니다.
            class_label = np.zeros(image.shape[:2], dtype=np.uint8)
            cv2.fillPoly(class_label, [points], 1)
            label[..., class_ind] = class_label
        
            
        return image, label, image_path, label_path

In [14]:
train_dataset = XRayDataset(is_train=True, transforms=None)


In [24]:
SAVE_ROOT = './crop_train'

In [64]:
fold_dir = os.path.join(SAVE_ROOT)
os.makedirs(fold_dir, exist_ok=True)

image_dir = os.path.join(fold_dir, 'images')
os.makedirs(image_dir, exist_ok=True)

annos_dir = os.path.join(fold_dir, 'annos')
os.makedirs(annos_dir, exist_ok=True)


for i in range(len(train_dataset)):
    image, label, image_path, label_path = train_dataset[i]

    for k in [CLASS2IND['Trapezoid'],CLASS2IND['Pisiform']]:

        coords = np.column_stack(np.where(label[:,:,k] > 0))

        x_min, y_min = coords.min(axis=0)  
        x_max, y_max = coords.max(axis=0)  

        cropped_image = image[x_min-10:x_max+10,y_min-10:y_max+10]
        cropped_mask = label[x_min-10:x_max+10,y_min-10:y_max+10,k]

        
        output_image_path = os.path.join(image_dir,str(k)+'_'+image_path.split('/')[-1])

    # break 
        cv2.imwrite(output_image_path, cropped_image)

        t = str(k)+'_'+label_path.split('/')[-1]
        t = t[:-4] + "png"
        output_label_path = os.path.join(annos_dir,t)

        cv2.imwrite(output_label_path, cropped_mask)


In [66]:
IMAGE_ROOT = "/data/ephemeral/home/level2-cv-semanticsegmentation-cv-06-lv3/data/fold_1/images"
LABEL_ROOT = "/data/ephemeral/home/level2-cv-semanticsegmentation-cv-06-lv3/data/fold_1/annos"

In [67]:
pngs = {
    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)
    for root, _dirs, files in os.walk(IMAGE_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".png"
}
len(pngs)

160

In [68]:
jsons = {
    os.path.relpath(os.path.join(root, fname), start=LABEL_ROOT)
    for root, _dirs, files in os.walk(LABEL_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".json"
}

len(jsons)

160

In [69]:
jsons_fn_prefix = {os.path.splitext(fname)[0] for fname in jsons}
pngs_fn_prefix = {os.path.splitext(fname)[0] for fname in pngs}

assert len(jsons_fn_prefix - pngs_fn_prefix) == 0
assert len(pngs_fn_prefix - jsons_fn_prefix) == 0

In [70]:
pngs = sorted(pngs)
jsons = sorted(jsons)

In [73]:
class XRayDataset(Dataset):
    def __init__(self, is_train=True, transforms=None):
        _filenames = np.array(pngs)
        _labelnames = np.array(jsons)
                
        self.filenames = _filenames
        self.labelnames = _labelnames
        self.is_train = is_train
        self.transforms = transforms
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, item):
        image_name = self.filenames[item]
        image_path = os.path.join(IMAGE_ROOT, image_name)
        image = cv2.imread(image_path)
        # image = image / 255.
        
        label_name = self.labelnames[item]
        label_path = os.path.join(LABEL_ROOT, label_name)
        
        # (H, W, NC) 모양의 label을 생성합니다.
        label_shape = tuple(image.shape[:2]) + (len(CLASSES), )
        label = np.zeros(label_shape, dtype=np.uint8)
        
        # label 파일을 읽습니다.
        with open(label_path, "r") as f:
            annotations = json.load(f)
        annotations = annotations["annotations"]
        
        # 클래스 별로 처리합니다.
        for ann in annotations:
            c = ann["label"]
            class_ind = CLASS2IND[c]
            points = np.array(ann["points"])
            
            # polygon 포맷을 dense한 mask 포맷으로 바꿉니다.
            class_label = np.zeros(image.shape[:2], dtype=np.uint8)
            cv2.fillPoly(class_label, [points], 1)
            label[..., class_ind] = class_label
        
            
        return image, label, image_path, label_path

In [74]:
train_dataset = XRayDataset(is_train=True, transforms=None)


In [75]:
SAVE_ROOT = './crop_val'

In [76]:
fold_dir = os.path.join(SAVE_ROOT)
os.makedirs(fold_dir, exist_ok=True)

image_dir = os.path.join(fold_dir, 'images')
os.makedirs(image_dir, exist_ok=True)

annos_dir = os.path.join(fold_dir, 'annos')
os.makedirs(annos_dir, exist_ok=True)


for i in range(len(train_dataset)):
    image, label, image_path, label_path = train_dataset[i]

    for k in [CLASS2IND['Trapezoid'],CLASS2IND['Pisiform']]:

        coords = np.column_stack(np.where(label[:,:,k] > 0))

        x_min, y_min = coords.min(axis=0)  
        x_max, y_max = coords.max(axis=0)  

        cropped_image = image[x_min-10:x_max+10,y_min-10:y_max+10]
        cropped_mask = label[x_min-10:x_max+10,y_min-10:y_max+10,k]

        
        output_image_path = os.path.join(image_dir,str(k)+'_'+image_path.split('/')[-1])

    # break 
        cv2.imwrite(output_image_path, cropped_image)

        t = str(k)+'_'+label_path.split('/')[-1]
        t = t[:-4] + "png"
        output_label_path = os.path.join(annos_dir,t)

        cv2.imwrite(output_label_path, cropped_mask)
