In [16]:
import torch
import cv2
import glob, os
import json
import pandas as pd

In [8]:
base_dataset_path = "dataset"

car_appearance_path = os.path.join(base_dataset_path, "car_appearance_origin")

dir2cls = {directory : i for i, directory in enumerate(glob.glob(car_appearance_path+"\*\*\*"))}
cls2dir = {i : directory for i, directory in enumerate(glob.glob(car_appearance_path+"\*\*\*"))}

In [50]:
dest_pataset_path = os.path.join(base_dataset_path, "car_appearance")
os.makedirs(dest_pataset_path, exist_ok=True)

In [38]:
columns = ["LargeCategoryId",
           'MediumCategoryId',
           'SmallCategoryId',
           'yearId',
           'colorId',
           'file_name',
           'directory',
           'classId']
annotation_df = pd.DataFrame(columns=columns)

In [39]:
def make_empty_row():
    
    return pd.Series(index=columns, dtype=str)

make_empty_row()

LargeCategoryId     NaN
MediumCategoryId    NaN
SmallCategoryId     NaN
yearId              NaN
colorId             NaN
file_name           NaN
directory           NaN
classId             NaN
dtype: object

In [58]:
import numpy as np

rows = []

for dir, cls in dir2cls.items():
    for file in list(set(map(lambda x:x.split('.')[0], os.listdir(dir)))):
        with open(dir+"\\"+file+".json", encoding="utf-8") as f:
            json_object = json.load(f)
        
        row = make_empty_row()
        
        row['LargeCategoryId'] = json_object['rawDataInfo']['LargeCategoryId']
        row['MediumCategoryId'] = json_object['rawDataInfo']['MediumCategoryId']
        row['SmallCategoryId'] = json_object['rawDataInfo']['SmallCategoryId']
        row['yearId'] = json_object['rawDataInfo']['yearId']
        row['colorId'] = json_object['rawDataInfo']['colorId']
        row['file_name'] = file+".jpg"
        row['directory'] = dir
        
        try:
            anno = list(filter(lambda x: x['classId']=='P00.차량전체',
                            json_object['learningDataInfo']['objects']))[0]
             
            file_path = os.path.join(dir, file+".jpg")
            img = cv2.imdecode(np.fromfile(file_path, dtype=np.uint8),
                            cv2.IMREAD_UNCHANGED)
            cropped_image = img[int(anno['coords']['tl']['y']):int(anno['coords']['br']['y']),
                                int(anno['coords']['tl']['x']):int(anno['coords']['br']['x'])]
            
            cv2.imwrite(dest_pataset_path+"/"+file+".jpg", cropped_image)
            
            rows.append(row)
        except IndexError:
            pass
        


annotation_df = pd.DataFrame(rows)

In [61]:
annotation_df.to_csv(dest_pataset_path+'/'+"annotation.csv", index=False)

In [None]:
anno_df= annotation_df.copy()

In [79]:
cat2cls = {}

anno_df['classId'] = -1

i = 0

cid = []
for idx, row in anno_df.iterrows():
    
    cat = row.iloc[0]+row.iloc[1]+row.iloc[2]+str(row.iloc[3])+row.iloc[4]
    
    if cat2cls.get(cat, -1) == -1:
        cat2cls[cat] = i
        i += 1
    
    cid.append(cat2cls[cat])
    
anno_df['classId'] = pd.Series(cid)

In [80]:
anno_df

Unnamed: 0,LargeCategoryId,MediumCategoryId,SmallCategoryId,yearId,colorId,file_name,directory,classId
0,중형차,아우디,A4,2017,검정,C_211222_AU_006_17_BK_A_T_03_019.jpg,dataset\car_appearance_origin\AU_아우디\006_A4\20...,0
1,중형차,아우디,A4,2017,검정,C_211223_AU_006_17_BK_B_T_03_009.jpg,dataset\car_appearance_origin\AU_아우디\006_A4\20...,0
2,중형차,아우디,A4,2017,검정,C_211223_AU_006_17_BK_B_T_03_024.jpg,dataset\car_appearance_origin\AU_아우디\006_A4\20...,0
3,중형차,아우디,A4,2017,검정,C_211223_AU_006_17_BK_B_T_03_012.jpg,dataset\car_appearance_origin\AU_아우디\006_A4\20...,0
4,중형차,아우디,A4,2017,검정,C_211222_AU_006_17_BK_A_T_03_013.jpg,dataset\car_appearance_origin\AU_아우디\006_A4\20...,0
...,...,...,...,...,...,...,...,...
11110,중형차,볼보,XC60,2018,검정,C_211213_VV_095_18_BK_A_T_03_022.jpg,dataset\car_appearance_origin\VV_볼보\095_XC60\2...,1035
11111,중형차,볼보,XC60,2018,검정,C_211213_VV_095_18_BK_A_T_03_023.jpg,dataset\car_appearance_origin\VV_볼보\095_XC60\2...,1035
11112,중형차,볼보,XC60,2018,검정,C_211213_VV_095_18_BK_A_T_03_018.jpg,dataset\car_appearance_origin\VV_볼보\095_XC60\2...,1035
11113,중형차,볼보,XC60,2019,회색,C_211104_VV_095_19_GR_A_T_03_005.jpg,dataset\car_appearance_origin\VV_볼보\095_XC60\2...,1036


In [81]:
anno_df.to_csv(dest_pataset_path+'/'+"annotation.csv", index=False)
