In [1]:
# python native
import os
import json
import random
import datetime
from functools import partial

# external library
import cv2
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.model_selection import GroupKFold
import albumentations as A

# torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models

# visualization
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm
  check_for_updates()


In [2]:
IMAGE_ROOT = "./train/DCM"
LABEL_ROOT = "./train/outputs_json"



In [3]:
CLASSES = [
    'finger-1', 'finger-2', 'finger-3', 'finger-4', 'finger-5',
    'finger-6', 'finger-7', 'finger-8', 'finger-9', 'finger-10',
    'finger-11', 'finger-12', 'finger-13', 'finger-14', 'finger-15',
    'finger-16', 'finger-17', 'finger-18', 'finger-19', 'Trapezium',
    'Trapezoid', 'Capitate', 'Hamate', 'Scaphoid', 'Lunate',
    'Triquetrum', 'Pisiform', 'Radius', 'Ulna',
]

In [4]:
CLASS2IND = {v: i for i, v in enumerate(CLASSES)}
IND2CLASS = {v: k for k, v in CLASS2IND.items()}

In [5]:
pngs = {
    os.path.relpath(os.path.join(root, fname), start=IMAGE_ROOT)
    for root, _dirs, files in os.walk(IMAGE_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".png"
}

In [6]:
jsons = {
    os.path.relpath(os.path.join(root, fname), start=LABEL_ROOT)
    for root, _dirs, files in os.walk(LABEL_ROOT)
    for fname in files
    if os.path.splitext(fname)[1].lower() == ".json"
}

In [7]:
jsons_fn_prefix = {os.path.splitext(fname)[0] for fname in jsons}
pngs_fn_prefix = {os.path.splitext(fname)[0] for fname in pngs}

assert len(jsons_fn_prefix - pngs_fn_prefix) == 0
assert len(pngs_fn_prefix - jsons_fn_prefix) == 0

In [8]:
pngs = sorted(pngs)
jsons = sorted(jsons)

In [9]:
_filenames = np.array(pngs)
_labelnames = np.array(jsons)

In [10]:
groups = [os.path.dirname(fname) for fname in _filenames]
ys = [0 for fname in _filenames]

## seed가 없다...

In [11]:
gkf = GroupKFold(n_splits=5)

## fold 설정 

In [12]:
SAVE_ROOT = './'

In [13]:
os.path.splitext(_filenames[0])

('ID001/image1661130828152_R', '.png')

In [14]:
'_'.join(_filenames[0].split('/'))

'ID001_image1661130828152_R.png'

In [15]:
from tqdm import tqdm

In [16]:
# # filenames = []
# # labelnames = []
# for i, (x, y) in enumerate(gkf.split(_filenames, ys, groups)):
#     fold_dir = os.path.join(SAVE_ROOT, f"fold_{i}")
#     os.makedirs(fold_dir, exist_ok=True)

#     image_dir = os.path.join(fold_dir,'images')
#     annos_dir = os.path.join(fold_dir,'annos')
#     os.makedirs(image_dir, exist_ok=True)
#     os.makedirs(annos_dir, exist_ok=True)



#     filenames = list(_filenames[y])
#     labelnames = list(_labelnames[y])

#     for image_name, label_name in tqdm(zip(filenames, labelnames)):
#         image_path = os.path.join(IMAGE_ROOT, image_name)
#         image = cv2.imread(image_path) / 255.0  # Normalize to [0, 1]
  
#         label_path = os.path.join(LABEL_ROOT, label_name)
        
#         # label_shape = tuple(image.shape[:2]) + (len(CLASSES),)
#         # label = np.zeros(label_shape, dtype=np.uint8)


#         with open(label_path, "r") as f:
#             annotations = json.load(f)["annotations"]
        
        
#         label_shape = tuple(image.shape[:2]) + (len(CLASSES),)
#         label = np.zeros(label_shape, dtype=np.uint8)


#         for ann in annotations:
#             c = ann["label"]
#             class_ind = CLASS2IND[c]
#             points = np.array(ann["points"])
            
#             class_label = np.zeros(image.shape[:2], dtype=np.uint8)
#             cv2.fillPoly(class_label, [points], 1)
#             label[..., class_ind] = class_label

#         image = torch.from_numpy(image.transpose(2, 0, 1)).float()
#         label = torch.from_numpy(label.transpose(2, 0, 1)).float()

#         torch.save(image, os.path.join(image_dir, f"{os.path.splitext('_'.join(image_name.split('/')))[0]}.pt"))
#         torch.save(label, os.path.join(annos_dir, f"{os.path.splitext('_'.join(label_name.split('/')))[0]}.pt"))


In [None]:
for i, (x, y) in enumerate(gkf.split(_filenames, ys, groups)):

    if i==0:
        fold_dir = os.path.join(SAVE_ROOT, f"fold_{1}")
        os.makedirs(fold_dir, exist_ok=True)
    else:
        fold_dir = os.path.join(SAVE_ROOT, f"fold_{0}")
        os.makedirs(fold_dir, exist_ok=True)

    image_dir = os.path.join(fold_dir, 'images')
    annos_dir = os.path.join(fold_dir, 'annos')
    os.makedirs(image_dir, exist_ok=True)
    os.makedirs(annos_dir, exist_ok=True)

    filenames = list(_filenames[y])
    labelnames = list(_labelnames[y])

    for image_name, label_name in tqdm(zip(filenames, labelnames)):
        # Load image and label without any processing
        image_path = os.path.join(IMAGE_ROOT, image_name)
        image = cv2.imread(image_path)  # Do not normalize


        output_image_path = os.path.join(image_dir, f"{os.path.splitext('_'.join(image_name.split('/')))[0]}.png")
        cv2.imwrite(output_image_path, image)

        label_path = os.path.join(LABEL_ROOT, label_name)
        with open(label_path, "r") as f:
            label = json.load(f)  # Do not convert to numpy array

        output_label_path = os.path.join(annos_dir, f"{os.path.splitext('_'.join(label_name.split('/')))[0]}.json")
        with open(output_label_path, 'w') as f:
            json.dump(label, f)


0it [00:00, ?it/s]

160it [00:42,  3.75it/s]
160it [00:43,  3.71it/s]
160it [00:42,  3.80it/s]
160it [00:43,  3.70it/s]
160it [00:43,  3.69it/s]


In [25]:
os.path.splitext('_'.join(_filenames[0].split('/')))[0]

'ID001_image1661130828152_R'

In [26]:
_labelnames[0]

'ID001/image1661130828152_R.json'