In [2]:
import os 
import numpy as np
import shutil
import cv2
import json
import glob
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings(action= 'ignore')

In [36]:
# img path, labeling path 불러오기
img_path_list = glob.glob('../original_dataset/**/CAM1/*')
label_path_list = glob.glob('../original_dataset/**/label/*')

In [37]:
# json 아닌 label
sum([x.split('.')[-1] != 'json' for x in label_path_list])

0

In [8]:
def js_to_img(data_path):
    data_path = data_path.replace('label' , 'CAM1').replace('json' , 'jpg')
    return data_path

In [38]:
print('img 개수 :', len(img_path_list))
print('label 개수 :', len(label_path_list))
print('defect ratio :' ,  str(round((len(label_path_list) / len(img_path_list)) * 100 , 2)) +'%' )

img 개수 : 31193
label 개수 : 438
defect ratio : 1.4%


In [41]:
# # make folder
# os.makedirs('../dataset/' , exist_ok= True)
# os.makedirs('../dataset/normal' , exist_ok= True)
# os.makedirs('../dataset/defect' , exist_ok= True)
# os.makedirs('../dataset/label' , exist_ok= True)

In [47]:
# # move defect json file & defect img file
# for label_path in label_path_list:
#     defect_img_path = js_to_img(label_path)
#     shutil.move(defect_img_path , '../dataset/defect/' + os.path.basename(defect_img_path)  )
#     shutil.move(label_path , '../dataset/label/' + os.path.basename(label_path)  )

# # move remian normal img file
# remain_img_path_list = glob.glob('../original_dataset/**/CAM1/*')
# for remain_img_path in remain_img_path_list:
#     shutil.move(remain_img_path , '../dataset/normal/' + os.path.basename(remain_img_path)  )

In [3]:
label_path_list = glob.glob('../dataset/label/*')
# make img dataframe
df_img = pd.DataFrame(
    columns=['id', 'file_name', 'RESOLUTION', 'width', 'height'])

# make box object dataframe
df_obj = pd.DataFrame(columns=['id', 'category_id', 'bbox', 'image_id'])

obj_idx = 1
for idx, label_path in enumerate(label_path_list, start=1):
    # json to img file path
    img_path = label_path.replace('label', 'defect').replace('json', 'jpg')

    # load json file
    with open(label_path, 'r') as file:
        json_file = json.load(file)
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)

    height, width = img.shape
    RESOLUTION = int(height * width)
    file_name = img_path

    df_img = df_img.append(
        {'id': idx,
            'file_name': file_name,
         'RESOLUTION': RESOLUTION,
         'height': height,
         'width': width},
        ignore_index=True
    )

    # get object
    obj_list = json_file[0]['annotations']
    for obj in obj_list:
        category = obj['label']
        bbox = {k: int(v) for k, v in obj['coordinates'].items()}
        bbox = list(bbox.values())
        df_obj = df_obj.append(
            {'id': obj_idx,
             'category_id': category,
             'bbox': bbox,
             'image_id': idx}, ignore_index=True
        )
        obj_idx += 1


In [4]:

normal_data_list = glob.glob('../dataset/normal/*')
normal_img_id = np.arange(372,len(normal_data_list)+372)
df_normal_img = pd.DataFrame({'id' : normal_img_id , 'file_name' : normal_data_list} )
df_normal_img['RESOLUTION'] = 467264
df_normal_img['width'] = 784
df_normal_img['height'] = 596

In [5]:
df_normal_img = df_normal_img.iloc[np.random.choice(df_normal_img.index , 2000)]

In [6]:
df_img = pd.concat([df_img,df_normal_img] , axis = 0)

In [7]:
df_img['file_name']= df_img['file_name'].apply(lambda x : x.replace('../dataset/' , ''))

In [8]:
df_img.tail()

Unnamed: 0,id,file_name,RESOLUTION,width,height
26555,26927,normal/221222_155410_0000029327_CAM1_NORMAL_OK...,467264,784,596
11458,11830,normal/221222_094123_0000027968_CAM1_NORMAL_OK...,467264,784,596
18223,18595,normal/221205_145941_0000004137_CAM1_NORMAL_OK...,467264,784,596
10425,10797,normal/221206_143911_0000005726_CAM1_NORMAL_OK...,467264,784,596
12272,12644,normal/221202_142259_0000001981_CAM1_NORMAL_OK...,467264,784,596


In [9]:
df_obj.tail()

Unnamed: 0,id,category_id,bbox,image_id
396,397,over,"[323, 287, 42, 42]",368
397,398,over,"[366, 303, 42, 28]",368
398,399,under,"[393, 259, 187, 111]",369
399,400,under,"[390, 256, 175, 129]",370
400,401,over,"[311, 268, 43, 65]",371


In [13]:
def yolo_to_coco(anno):
    x ,y , w, h = anno
    x1 , y1 = x - (w // 2) , y - (h //2)
    return [x1 ,y1 , w ,h ]


In [15]:
df_obj['bbox'] = df_obj['bbox'].apply(lambda x : yolo_to_coco(x))

In [16]:
df_obj['area'] = df_obj['bbox'].apply(lambda x : x[2] * x[3])

In [17]:
df_obj['category_id'].value_counts()

over           233
under          153
non-welding     15
Name: category_id, dtype: int64

In [18]:
category_to_id = {'over' : 1 , 
 'under' : 2 , 
 'non-welding' : 3}

df_obj['category_id'] = df_obj['category_id'].apply(lambda x : category_to_id[x])
df_obj['iscrowd'] = 0

In [19]:
df_img.to_csv('../info/df_img.csv' , index = False)
df_obj.to_csv('../info/df_obj.csv' , index = False)

In [20]:
# cvt COCO
df_coco = {}
df_coco['images'] = df_img.to_dict('records')
df_coco['annotations']= df_obj.to_dict('records')
df_coco['categories'] = [{'id': category_to_id['over'] , 'name': 'over', 'supercategory': 'defect'},
                         {'id': category_to_id['under'], 'name': 'under', 'supercategory': 'defect'},
                        {'id': category_to_id['non-welding'], 'name': 'non-welding', 'supercategory': 'defect'}]


In [21]:
with open("../info/coco.json", "w") as json_file:
    json.dump(df_coco, json_file , indent = 4)