In [2]:
import os
from pprint import pprint
import json
from tqdm import tqdm
import shutil
from random import shuffle

In [3]:
image_root = '/home/work/road_mark/data_roadmark/IMAGE/'
label_root = '/home/work/road_mark/data_roadmark/2D_JSON/'

preprocessed_image_root = '/home/work/road_mark/preprocessed_data_roadmark/images/'
preprocessed_label_root = '/home/work/road_mark/preprocessed_data_roadmark/labels/'
dataset_root = '/home/work/road_mark/data/full_class_roadmark_data/'
train_image_root = f'{dataset_root}train/images/'
train_label_root = f'{dataset_root}train/labels/'
valid_image_root = f'{dataset_root}valid/images/'
valid_label_root = f'{dataset_root}valid/labels/'
test_image_root = f'{dataset_root}test/images/'
test_label_root = f'{dataset_root}test/labels/'

os.makedirs(f'{preprocessed_image_root}n_crosswalk_images', exist_ok=True)
os.makedirs(f'{preprocessed_label_root}n_crosswalk_labels', exist_ok=True)
os.makedirs(f'{preprocessed_image_root}d_crosswalk_images', exist_ok=True)
os.makedirs(f'{preprocessed_label_root}d_crosswalk_labels', exist_ok=True)
os.makedirs(f'{preprocessed_image_root}n_bump_images', exist_ok=True)
os.makedirs(f'{preprocessed_label_root}n_bump_labels', exist_ok=True)
os.makedirs(f'{preprocessed_image_root}d_bump_images', exist_ok=True)
os.makedirs(f'{preprocessed_label_root}d_bump_labels', exist_ok=True)
os.makedirs(train_image_root, exist_ok=True)
os.makedirs(train_label_root, exist_ok=True)
os.makedirs(valid_image_root, exist_ok=True)
os.makedirs(valid_label_root, exist_ok=True)
os.makedirs(test_image_root, exist_ok=True)
os.makedirs(test_label_root, exist_ok=True)

#### json 라벨 데이터 Load 및 정상/훼손 분류

In [49]:
normal_roads = ['0101', '0102']
damaged_roads = ['0201', '0202']

total_list = []
normal_group_list = []
damaged_group_list = []

# JSON 파일 처리
for file_name in tqdm(os.listdir(label_root)):
    if file_name.endswith('.json'):
        file_path = os.path.join(label_root, file_name)
        with open(file_path, 'r', encoding='utf-8') as file:
            data = json.load(file)

        # 이미지 road 값을 기준으로 분류
        image = data.get('images')[0]
        # time - light : 오전
        if image and image.get('time') == 'light':
            total_list.append(data)
            road = image.get('road')
            if road in normal_roads:
                normal_group_list.append(data)
            elif road in damaged_roads:
                damaged_group_list.append(data)

print("Total:", len(total_list) ,total_list[0])
print("Normal Group List:", len(normal_group_list) ,normal_group_list[0])
print("Damaged Group List:", len(damaged_group_list), damaged_group_list[0])

  0%|          | 0/91829 [00:00<?, ?it/s]

100%|██████████| 91829/91829 [00:37<00:00, 2459.96it/s]

Total: 88147 {'images': [{'id': '0000315672', 'filename': '0202_light_clear_smooth_09002271.png', 'width': 1920, 'height': 1080, 'sensor': 'C', 'road': '0202', 'time': 'light', 'weather': 'clear', 'flow': 'smooth'}], 'annotations': [{'polyline': [[733.4556473266174, 1025.439720312428, 813.3243021769648, 803.036785921674, 868.0812909852375, 643.7682395676113]], 'image_id': '0000315672', 'category_id': 381, 'id': '0002637972'}, {'polyline': [[615.1364871678499, 1033.0532651361884, 762.4634997430664, 791.4863326404289, 845.3417484809795, 644.5337064277724]], 'image_id': '0000315672', 'category_id': 381, 'id': '0002637973'}, {'polyline': [[1908.4949893425653, 994.4951684397997, 1812.1195420866256, 944.3696427344944, 1334.6704739613506, 748.143238846812, 1233.0794142656775, 706.6689835767469, 1187.4671017492528, 684.8949995599627, 1180.7289192184173, 665.1947283066818, 1189.5403886818176, 648.0865980077798, 1217.5297622714418, 637.1996059993877]], 'image_id': '0000315672', 'category_id': 39




#### 각 라벨(정상, 훼손/횡단보도, 과속방지턱) 집계

In [54]:
# 카운트할 category_id
category_ids_to_count = [412, 413, 414, 433]  # 412:횡단보도, 413:고원식 횡단보도, 414:자전거 횡단보도, 433:과속방지턱

# 특정 category_id의 개수를 집계하는 함수
def count_category_ids(data_list, category_ids):
    category_count = {category_id: 0 for category_id in category_ids}
    for data in data_list:
        for annotation in data.get('annotations', []):
            category_id = annotation.get('category_id')
            if category_id in category_ids:
                category_count[category_id] += 1
    return category_count

# 정상 및 훼손 그룹에서 category_id 개수 집계
normal_category_count = count_category_ids(normal_group_list, category_ids_to_count)
damaged_category_count = count_category_ids(damaged_group_list, category_ids_to_count)

# 횡단보도의 총 개수 계산
total_crosswalk_normal = normal_category_count[412] + normal_category_count[413] + normal_category_count[414]
total_crosswalk_damaged = damaged_category_count[412] + damaged_category_count[413] + damaged_category_count[414]

print(f"Normal data : 횡단보도({normal_category_count[412]}), 고원식 횡단보도({normal_category_count[413]}), 자전거 횡단보도({normal_category_count[414]}), 과속방지턱({normal_category_count[433]})")
print(f"Total 횡단보도(Normal): {total_crosswalk_normal}")
print(f"Damaged data : 횡단보도({damaged_category_count[412]}), 고원식 횡단보도({damaged_category_count[413]}), 자전거 횡단보도({damaged_category_count[414]}), 과속방지턱({damaged_category_count[433]})")
print(f"Total 횡단보도(Damaged): {total_crosswalk_damaged}")


Normal data : 횡단보도(20204), 고원식 횡단보도(796), 자전거 횡단보도(1190), 과속방지턱(3388)
Total 횡단보도(Normal): 22190
Damaged data : 횡단보도(16390), 고원식 횡단보도(922), 자전거 횡단보도(2369), 과속방지턱(3129)
Total 횡단보도(Damaged): 19681


#### json 형식을 YOLO 라벨 형식으로 변환 및 저장

In [None]:
target_category_id = [412, 433]
target_class_id = {412: 0, 433: 2}
crosswalk_count = 0
bump_count = 0
normal_except_list = []

for data in tqdm(normal_group_list):
    width = data['images'][0]['width']
    height = data['images'][0]['height']
    file_name = data['images'][0]['filename']

    temp_label_list = []
    for annotation in data['annotations']:
        if annotation['category_id'] in target_category_id:
            try:
                segmentation = annotation['segmentation'][0]
            except:
                normal_except_list.append(file_name)
                temp_label_list = []
                break

#             # YOLO 정규화
#             normalized_segmentation = []
#             for i in range(0, len(segmentation), 2):
#                 x = min(max(segmentation[i], 0), width)
#                 y = min(max(segmentation[i + 1], 0), height)
#                 x /= width
#                 y /= height
#                 normalized_segmentation.extend([x, y])

#             label = target_class_id[annotation['category_id']]
#             normalized_segmentation.insert(0, label)
#             temp_label_list.append(normalized_segmentation)

#     if temp_label_list:
#         file_name = file_name.split('.')[0]
#         bump_flag = False
#         for val in temp_label_list:
#             if val[0] == 2:
#                 bump_flag = True
#                 break
#         if bump_flag:
#             bump_count += 1
#             with open(f'{preprocessed_label_root}n_bump_labels/{file_name}.txt', 'w') as fp:
#                 for val in temp_label_list:    
#                     input_data = list(map(str, val))
#                     fp.write(' '.join(input_data) + '\n')
#             shutil.copyfile(f'{image_root}{file_name}.png', f'{preprocessed_image_root}n_bump_images/{file_name}.png')
#         else:
#             crosswalk_count += 1
#             with open(f'{preprocessed_label_root}n_crosswalk_labels/{file_name}.txt', 'w') as fp:
#                 for val in temp_label_list:    
#                     input_data = list(map(str, val))
#                     fp.write(' '.join(input_data) + '\n')
#             shutil.copyfile(f'{image_root}{file_name}.png', f'{preprocessed_image_root}n_crosswalk_images/{file_name}.png')
            
# print(f"Normal image count : 횡단보도({crosswalk_count}), 과속방지턱({bump_count})")

100%|██████████| 48417/48417 [00:00<00:00, 168391.37it/s]


In [None]:
print(len(normal_except_list))
normal_except_list[:5]

46


['0102_light_clear_smooth_04004122.png',
 '0102_light_clear_smooth_01004884.png',
 '0102_light_clear_smooth_04007445.png',
 '0102_light_clear_smooth_06001045.png',
 '0102_light_clear_smooth_04007618.png']

In [None]:
target_category_id = [412, 433]
target_class_id = {412: 1, 433: 3}
crosswalk_count = 0
bump_count = 0
damaged_except_list = []

for data in tqdm(damaged_group_list):
    width = data['images'][0]['width']
    height = data['images'][0]['height']
    file_name = data['images'][0]['filename']

    temp_label_list = []
    for annotation in data['annotations']:
        if annotation['category_id'] in target_category_id:
            try:
                segmentation = annotation['segmentation'][0]
            except:
                damaged_except_list.append(file_name)
                temp_label_list = []
                break

#             # YOLO 정규화
#             normalized_segmentation = []
#             for i in range(0, len(segmentation), 2):
#                 x = min(max(segmentation[i], 0), width)
#                 y = min(max(segmentation[i + 1], 0), height)
#                 x /= width
#                 y /= height
#                 normalized_segmentation.extend([x, y])

#             label = target_class_id[annotation['category_id']]
#             normalized_segmentation.insert(0, label)
#             temp_label_list.append(normalized_segmentation)

#     if temp_label_list:
#         file_name = file_name.split('.')[0]
#         bump_flag = False
#         for val in temp_label_list:
#             if val[0] == 3:
#                 bump_flag = True
#                 break
#         if bump_flag:
#             bump_count += 1
#             with open(f'{preprocessed_label_root}d_bump_labels/{file_name}.txt', 'w') as fp:
#                 for val in temp_label_list:    
#                     input_data = list(map(str, val))
#                     fp.write(' '.join(input_data) + '\n')
#             shutil.copyfile(f'{image_root}{file_name}.png', f'{preprocessed_image_root}d_bump_images/{file_name}.png')
#         else:
#             crosswalk_count += 1
#             with open(f'{preprocessed_label_root}d_crosswalk_labels/{file_name}.txt', 'w') as fp:
#                 for val in temp_label_list:    
#                     input_data = list(map(str, val))
#                     fp.write(' '.join(input_data) + '\n')
#             shutil.copyfile(f'{image_root}{file_name}.png', f'{preprocessed_image_root}d_crosswalk_images/{file_name}.png')
            
# print(f"Damaged image count : 횡단보도({crosswalk_count}), 과속방지턱({bump_count})")

100%|██████████| 39730/39730 [00:00<00:00, 151955.20it/s]


In [None]:
print(len(damaged_except_list))
damaged_except_list[:5]

35


['0202_light_clear_smooth_09000363.png',
 '0202_light_clear_smooth_03000555.png',
 '0202_light_clear_smooth_09001754.png',
 '0202_light_clear_smooth_01000008.png',
 '0202_light_clear_smooth_08005229.png']

In [None]:
# 412, 433
for val in damaged_except_list:
    with open(label_root + val.split('.')[0] + '.json', 'r') as fp:
        data = json.load(fp)
    pprint(data)

{'annotations': [{'area': 1022.2747708864358,
                  'bbox': [862.2309234798879,
                           580.7175101177776,
                           70.38689693975755,
                           14.52365163591991],
                  'category_id': 412,
                  'id': '0002748135',
                  'image_id': '0000313764',
                  'size': 'S'},
                 {'area': 1021.9396639542284,
                  'bbox': [686.2431884544043,
                           580.5658686674387,
                           67.76257333662022,
                           15.08118144919907],
                  'category_id': 412,
                  'id': '0002748136',
                  'image_id': '0000313764',
                  'size': 'S'},
                 {'area': 1059.8390969623833,
                  'bbox': [519.9750286904296,
                           575.2593757229548,
                           70.01482186974745,
                           15.137353329757264],
  

#### 데이터셋 생성

In [5]:
def data_spliter(data_list, dir_flag, ratio=(8, 2)):
    split_index = int(len(data_list) * (ratio[0] / sum(ratio)))
    train_list = data_list[:split_index]
    valid_list = data_list[split_index:]
    print(f'data len: {len(data_list)}')
    print(f'Train len: {len(train_list)}')
    print(f'Val len: {len(valid_list)}')
    
    for file_name in tqdm(train_list):
        file_name = file_name.split('.')[0]
        shutil.copyfile(f'{preprocessed_image_root}{dir_flag}_images/{file_name}.png', f'{train_image_root}{file_name}.png')
        shutil.copyfile(f'{preprocessed_label_root}{dir_flag}_labels/{file_name}.txt', f'{train_label_root}{file_name}.txt')

    for file_name in tqdm(valid_list):
        file_name = file_name.split('.')[0]
        shutil.copyfile(f'{preprocessed_image_root}{dir_flag}_images/{file_name}.png', f'{valid_image_root}{file_name}.png')
        shutil.copyfile(f'{preprocessed_label_root}{dir_flag}_labels/{file_name}.txt', f'{valid_label_root}{file_name}.txt')

In [6]:
dir_flag = 'd_bump'
data_list = os.listdir(f'{preprocessed_label_root}{dir_flag}_labels/')
print(f'Start: {preprocessed_label_root}{dir_flag}')
data_spliter(data_list, dir_flag)

Start: /home/work/road_mark/preprocessed_data_roadmark/labels/d_bump
data len: 2885
Train len: 2308
Val len: 577


  0%|          | 0/2308 [00:00<?, ?it/s]

100%|██████████| 2308/2308 [01:02<00:00, 36.71it/s]
100%|██████████| 577/577 [00:15<00:00, 36.82it/s]


In [7]:
dir_flag = 'd_crosswalk'
data_list = os.listdir(f'{preprocessed_label_root}{dir_flag}_labels/')
print(f'Original data len: {len(data_list)}')
data_list = data_list
print(f'Start: {preprocessed_label_root}{dir_flag}')
data_spliter(data_list, dir_flag)

Original data len: 12981
Start: /home/work/road_mark/preprocessed_data_roadmark/labels/d_crosswalk
data len: 12981
Train len: 10384
Val len: 2597


100%|██████████| 10384/10384 [04:48<00:00, 35.99it/s]
100%|██████████| 2597/2597 [01:12<00:00, 35.66it/s]


In [8]:
dir_flag = 'n_bump'
data_list = os.listdir(f'{preprocessed_label_root}{dir_flag}_labels/')
print(f'Original data len: {len(data_list)}')
data_list = data_list
print(f'Start: {preprocessed_label_root}{dir_flag}')
data_spliter(data_list, dir_flag)

Original data len: 3219
Start: /home/work/road_mark/preprocessed_data_roadmark/labels/n_bump
data len: 3219
Train len: 2575
Val len: 644


100%|██████████| 2575/2575 [01:13<00:00, 35.01it/s]
100%|██████████| 644/644 [00:18<00:00, 35.08it/s]


In [9]:
dir_flag = 'n_crosswalk'
data_list = os.listdir(f'{preprocessed_label_root}{dir_flag}_labels/')
print(f'Original data len: {len(data_list)}')
data_list = data_list[:12981]
print(f'Start: {preprocessed_label_root}{dir_flag}')
data_spliter(data_list, dir_flag)

Original data len: 16784
Start: /home/work/road_mark/preprocessed_data_roadmark/labels/n_crosswalk
data len: 12981
Train len: 10384
Val len: 2597


100%|██████████| 10384/10384 [04:53<00:00, 35.39it/s]
100%|██████████| 2597/2597 [01:13<00:00, 35.39it/s]


#### 데이터셋 데이터 수 확인

- 전체 데이터

In [68]:
from collections import defaultdict

dir_flag_list = ['n_crosswalk', 'n_bump', 'd_crosswalk', 'd_bump']
count_dict = defaultdict(int)

for dir_flag in dir_flag_list:
    for file_path in tqdm(os.listdir(f'{preprocessed_label_root}{dir_flag}_labels/')):
        with open(f'{preprocessed_label_root}{dir_flag}_labels/' + file_path, 'r') as file:
            for line in file:
                # 각 줄의 첫 번째 숫자를 추출
                first_number = line.strip().split()[0]
                count_dict[first_number] += 1

counts = count_dict
for number, count in counts.items():
    print(f"{number}: {count}")

100%|██████████| 16784/16784 [00:04<00:00, 3495.13it/s]
100%|██████████| 3219/3219 [00:01<00:00, 1821.88it/s]
100%|██████████| 12981/12981 [00:09<00:00, 1400.21it/s]
100%|██████████| 2885/2885 [00:02<00:00, 1028.40it/s]

0: 20130
2: 3380
1: 16336
3: 3122





- 균등 클래스 데이터

In [12]:
from collections import defaultdict

dir_flag_list = ['n_crosswalk', 'n_bump', 'd_crosswalk', 'd_bump']
count_dict = defaultdict(int)

for file_path in tqdm(os.listdir('/home/work/road_mark/data/eqaulity_class_roadmark_data/train/labels/')):
    with open('/home/work/road_mark/data/eqaulity_class_roadmark_data/train/labels/' + file_path, 'r') as file:
        for line in file:
            # 각 줄의 첫 번째 숫자를 추출
            first_number = line.strip().split()[0]
            count_dict[first_number] += 1

for file_path in tqdm(os.listdir('/home/work/road_mark/data/eqaulity_class_roadmark_data/valid/labels/')):
    with open('/home/work/road_mark/data/eqaulity_class_roadmark_data/valid/labels/' + file_path, 'r') as file:
        for line in file:
            # 각 줄의 첫 번째 숫자를 추출
            first_number = line.strip().split()[0]
            count_dict[first_number] += 1

counts = count_dict
for number, count in counts.items():
    print(f"{number}: {count}")

100%|██████████| 9232/9232 [00:06<00:00, 1392.22it/s]
100%|██████████| 2308/2308 [00:01<00:00, 1305.48it/s]

0: 4452
1: 4596
2: 3025
3: 3122





- 전체 클래스 데이터

In [3]:
from collections import defaultdict

dir_flag_list = ['n_crosswalk', 'n_bump', 'd_crosswalk', 'd_bump']
count_dict = defaultdict(int)

for file_path in tqdm(os.listdir('/home/work/road_mark/data/full_class_roadmark_data/train/labels/')):
    with open('/home/work/road_mark/data/full_class_roadmark_data/train/labels/' + file_path, 'r') as file:
        for line in file:
            # 각 줄의 첫 번째 숫자를 추출
            first_number = line.strip().split()[0]
            count_dict[first_number] += 1

for file_path in tqdm(os.listdir('/home/work/road_mark/data/full_class_roadmark_data/valid/labels/')):
    with open('/home/work/road_mark/data/full_class_roadmark_data/valid/labels/' + file_path, 'r') as file:
        for line in file:
            # 각 줄의 첫 번째 숫자를 추출
            first_number = line.strip().split()[0]
            count_dict[first_number] += 1

counts = count_dict
for number, count in counts.items():
    print(f"{number}: {count}")

100%|██████████| 25651/25651 [00:17<00:00, 1463.06it/s]
100%|██████████| 6415/6415 [00:04<00:00, 1416.12it/s]

0: 15874
1: 16336
2: 3380
3: 3122





In [5]:
print(len(os.listdir('/home/work/road_mark/data/full_class_roadmark_data/train/images/')))
print(len(os.listdir('/home/work/road_mark/data/full_class_roadmark_data/valid/images/')))

25651
6415
