In [1]:
import os
import os.path as osp
import json
import re
import shutil
import glob
import random
import pandas as pd
from tqdm.auto import tqdm
from typing import List

error_cases = {}

# json path 전체 반환
def get_json_file_paths(src_path:str,exclude_directories: List[str] = None) -> List[str]:
    '''
    지정된 디렉토리(src_path)에서 모든 json file을 재귀적으로 탐색 -> exclude_dirs list에 담긴 directory명들은 탐색에서 제외
    
    Args:
        src_path (str): json file 찾을 디렉토리 경로
        exclude_dirs (List[str], optional): 검색에서 제외할 하위 디렉토리 이름 목록
        
    Returns:
        List[str]: 디렉토리(src_path) 경로에서 발견된 모든 json 파일의 경로 목록 반환
    
    '''
    total_json_paths = []
    for root,dirs,files in os.walk(src_path):
        if exclude_directories:
            dirs[:] = [one_src_path for one_src_path in dirs if one_src_path not in exclude_directories]
        else:
            pass
        for file in files:
            if file.endswith('.json'):
                total_json_paths.append(os.path.join(root,file))
    return total_json_paths
    
# json path 1개 반환 제너레이터
def yield_json_file_paths(src_path:str) -> List[str]:
    '''
    지정된 디렉토리(src_path)에서 모든 json file path 생성 -> json file path 하나씩 반환하는 제너레이터 함수
    
    Args:
        src_path (str): json file 찾을 디렉토리 경로
    
    Yield : 
        str : json file path
    
    '''
    for root,dirs,files in os.walk(src_path):
        for file in files :
            if file.endswith('.json'):
                yield os.path.join(root,file)
                
# json file 읽고 반환
def load_json_file(src_file_path:str) -> dict:
    '''
    json file을 읽고 딕셔너리 데이터로 반환
    
    Args:
        src_file_path (str): 읽을 json file 경로
    
    Returns:
        dict: json file data를 담은 dict 반환
    '''
    if os.path.isfile(src_file_path) and src_file_path.endswith('.json'):
        try :
            with open(src_file_path,'r',encoding='utf-8-sig') as f:
                return json.load(f)
        except :
            error_cases.setdefault('read_json_error : wrong json', []).append(src_file_path)
    else :
         error_cases.setdefault('read_json_error : check path', []).append(src_file_path)
            
# 경로내에 전체 json file 내용 반환
def load_json_files(src_path:str,mode:str = 'yield') -> List[dict]:
    '''
    src_path 경로에 존재하는 json file을 list에 담고 반환
    
    Args:
        src_path (str): json file 찾을 디렉토리 경로
    
    Returns:
        List(dir): 전체 json 경로 반환
    '''
    total_json = []
    # json path yield
    if mode == 'yield':
        for src_file_path in yield_json_file_paths(src_path):
            total_json.append(load_json_file(src_file_path))
        return total_json
        
    # find all json path
    else: 
        return get_json_file_paths(src_path)


    
# file 저장
def save_json_file(json_file,path):
    with open(path, 'w',encoding='utf-8') as f:
        json.dump(json_file, f,ensure_ascii=False, indent=4) 

In [4]:
osp.dirname(one_json_path)

NameError: name 'one_json_path' is not defined

In [2]:
src_dir = '/run/user/1000/gvfs/sftp:host=192.168.0.114/ADIN22023_AIMMO_AD/2_PJT/6_final_output/tan/최종 검수'

In [3]:
json_paths = get_json_file_paths(src_dir)
print(src_dir)
print(json_paths)

/run/user/1000/gvfs/sftp:host=192.168.0.114/ADIN22023_AIMMO_AD/2_PJT/6_final_output/tan/최종 검수
['/run/user/1000/gvfs/sftp:host=192.168.0.114/ADIN22023_AIMMO_AD/2_PJT/6_final_output/tan/최종 검수/batch_1/20220511/2022-05-11_14-47-16_ADCV1-ADS-LC1/FR-View-CMR-Wide/1652248039218_FR-View-CMR-Wide.png.json', '/run/user/1000/gvfs/sftp:host=192.168.0.114/ADIN22023_AIMMO_AD/2_PJT/6_final_output/tan/최종 검수/batch_1/20220511/2022-05-11_14-47-16_ADCV1-ADS-LC1/FR-View-CMR-Wide/1652248036218_FR-View-CMR-Wide.png.json', '/run/user/1000/gvfs/sftp:host=192.168.0.114/ADIN22023_AIMMO_AD/2_PJT/6_final_output/tan/최종 검수/batch_1/20220511/2022-05-11_14-47-16_ADCV1-ADS-LC1/FR-View-CMR-Wide/1652248043218_FR-View-CMR-Wide.png.json', '/run/user/1000/gvfs/sftp:host=192.168.0.114/ADIN22023_AIMMO_AD/2_PJT/6_final_output/tan/최종 검수/batch_1/20220511/2022-05-11_14-47-16_ADCV1-ADS-LC1/FR-View-CMR-Wide/1652248042218_FR-View-CMR-Wide.png.json', '/run/user/1000/gvfs/sftp:host=192.168.0.114/ADIN22023_AIMMO_AD/2_PJT/6_final_output/

In [4]:
len(json_paths)

9175

In [5]:
for one_json_path in tqdm(json_paths):
    one_json = load_json_file(one_json_path)
    new_annotations = []
    for one_annotation in one_json['annotations']:
        if 'occlusion' in one_annotation['attributes']:
            one_annotation['attributes']['occlusion'] = one_annotation['attributes']['occlusion'].replace('%','')
        if 'truncation' in one_annotation['attributes']:
            one_annotation['attributes']['truncation'] = one_annotation['attributes']['truncation'].replace('%','')
        new_annotations.append(one_annotation)
    one_json['annotations'] = new_annotations
    save_json_file(one_json,one_json_path)

  0%|          | 0/9175 [00:00<?, ?it/s]