In [6]:
# 'GPT/PETCT_2017_THANG 12_images_chest_day_5_patient_421.json' --> 'PETCT_2017_THANG 12_5_patient_421_REPORT_patient_421'
# 'GPT/PETCT_2017_THANG 12_images_chest_day_27_patient_399.json' --> "PETCT_2017_THANG 12_27_patient_399_REPORT_patient_399"


PETCT_2017_THANG 12_images__patient_421_REPORT_patient_421
PETCT_2017_THANG 12_images__patient_399_REPORT_patient_399


In [19]:
import re
import os

def convert_path(input_path: str) -> str:
    """
    Converts a file path string according to the specified pattern.
    Example:
    'GPT/PETCT_2017_THANG 12_images_chest_day_5_patient_421.json'
    --> 'PETCT_2017_THANG 12_5_patient_421_REPORT_patient_421'
    'GPT/PETCT_2018_THANG 11_images_chest_day_06_patient_643.json'
    --> 'PETCT_2018_THANG 11_06_patient_643_REPORT_patient_643'
    """
    # 1. Get just the filename from the input path
    filename = os.path.basename(input_path)

    # 2. Define the regex pattern to capture the necessary parts
    #    - Group 1 (prefix_part): (PETCT_\d{4}_THANG \d{1,2}_)
    #      - PETCT_ : Literal
    #      - \d{4}  : Year (e.g., 2017, 2018)
    #      - _THANG : Literal "_THANG " (with space)
    #      - \d{1,2}: Month number (e.g., 12, 11, or even 1, 2 if applicable)
    #      - _      : Literal underscore
    #    - images_chest_day_ : The part to be "skipped"
    #    - Group 2 (day_num): (\d+)
    #      - \d+    : Day number (e.g., 5, 06, 27)
    #    - _                   : Literal underscore separator
    #    - Group 3 (patient_id_full): (patient_\d+)
    #      - patient_ : Literal
    #      - \d+      : Patient number (e.g., 421, 399, 643)
    #    - \.json$             : Matches the ".json" at the end of the string
    pattern = r"(PETCT_\d{4}_THANG \d{1,2}_)images_chest_day_(\d+)_(patient_\d+)\.json$"

    match = re.match(pattern, filename)

    if match:
        prefix_part = match.group(1)      # e.g., "PETCT_2017_THANG 12_"
        day_num = match.group(2)          # e.g., "5", "06"
        patient_id_full = match.group(3)  # e.g., "patient_421"

        # 4. Construct the new string
        #    Format: prefix_part + day_num + "_" + patient_id_full + "_REPORT_" + patient_id_full
        output_filename = f"{prefix_part}{day_num}_{patient_id_full}_REPORT_{patient_id_full}"
        return output_filename
    else:
        # Handle cases where the pattern doesn't match
        print(f"Warning: Path '{input_path}' (filename: '{filename}') did not match the expected pattern.")
        return input_path # Return original path or raise an error/return None as per desired behavior

# Test cases based on the docstring and previous examples
test_cases = [
    {
        "input": 'GPT/PETCT_2017_THANG 12_images_chest_day_5_patient_421.json',
        "expected": 'PETCT_2017_THANG 12_5_patient_421_REPORT_patient_421'
    },
    {
        "input": 'GPT/PETCT_2018_THANG 11_images_chest_day_06_patient_643.json',
        "expected": 'PETCT_2018_THANG 11_06_patient_643_REPORT_patient_643'
    },
    {
        "input": 'GPT/PETCT_2017_THANG 12_images_chest_day_27_patient_399.json', # From initial examples
        "expected": "PETCT_2017_THANG 12_27_patient_399_REPORT_patient_399"
    },
    {
        "input": 'PETCT_2023_THANG 1_images_chest_day_1_patient_101.json', # Test without "GPT/" and single digit month/day
        "expected": "PETCT_2023_THANG 1_1_patient_101_REPORT_patient_101"
    },
    {
        "input": 'GPT/INVALID_FORMAT.json', # Test a non-matching format
        "expected": 'GPT/INVALID_FORMAT.json' # Expect original path due to warning
    }
]


for i, tc in enumerate(test_cases):
    print(f"--- Test Case {i+1} ---")
    input_val = tc["input"]
    expected_val = tc["expected"]
    actual_val = convert_path(input_val)

    print(f"Input:    '{input_val}'")
    print(f"Expected: '{expected_val}'")
    print(f"Actual:   '{actual_val}'")
    if actual_val == expected_val:
        print("Result:   PASS")
    else:
        print("Result:   FAIL")
    print("-" * 20)

# Example usage from docstring
path1 = 'GPT/PETCT_2017_THANG 12_images_chest_day_5_patient_421.json'
converted1 = convert_path(path1)
print(f"\nExample 1:\n'{path1}' --> '{converted1}'")

path2 = 'GPT/PETCT_2018_THANG 11_images_chest_day_06_patient_643.json'
converted2 = convert_path(path2)
print(f"Example 2:\n'{path2}' --> '{converted2}'")

--- Test Case 1 ---
Input:    'GPT/PETCT_2017_THANG 12_images_chest_day_5_patient_421.json'
Expected: 'PETCT_2017_THANG 12_5_patient_421_REPORT_patient_421'
Actual:   'PETCT_2017_THANG 12_5_patient_421_REPORT_patient_421'
Result:   PASS
--------------------
--- Test Case 2 ---
Input:    'GPT/PETCT_2018_THANG 11_images_chest_day_06_patient_643.json'
Expected: 'PETCT_2018_THANG 11_06_patient_643_REPORT_patient_643'
Actual:   'PETCT_2018_THANG 11_06_patient_643_REPORT_patient_643'
Result:   PASS
--------------------
--- Test Case 3 ---
Input:    'GPT/PETCT_2017_THANG 12_images_chest_day_27_patient_399.json'
Expected: 'PETCT_2017_THANG 12_27_patient_399_REPORT_patient_399'
Actual:   'PETCT_2017_THANG 12_27_patient_399_REPORT_patient_399'
Result:   PASS
--------------------
--- Test Case 4 ---
Input:    'PETCT_2023_THANG 1_images_chest_day_1_patient_101.json'
Expected: 'PETCT_2023_THANG 1_1_patient_101_REPORT_patient_101'
Actual:   'PETCT_2023_THANG 1_1_patient_101_REPORT_patient_101'
Resul

In [20]:
json_path = '/home/jovyan/shared/tienhuu060102/data-petct/clinical_eval/w_refactor/mapping_dict.json'
import json 
# read json
with open(json_path, 'r') as f:
    mapping_dict = json.load(f)

# print(data)



In [21]:
convert_path('GPT/PETCT_2018_THANG 11_images_chest_day_06_patient_643.json')

'PETCT_2018_THANG 11_06_patient_643_REPORT_patient_643'

In [22]:
name2id = {}
for key, value in mapping_dict.items():
    name2id[value] = key

name2id[convert_path('GPT/PETCT_2018_THANG 11_images_chest_day_06_patient_643.json')]







'37c21d01da39b77166461a72a652380bee260402d0fea828f800ad2cc535b0ef'

In [25]:
import os 
rootdir = 'GPT'
save_dir = 'GPT_new'
os.makedirs(save_dir, exist_ok=True)
for file in os.listdir(rootdir):
    if file.endswith('.json'):
        try:
            path = os.path.join(rootdir, file)
            with open(path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            new_path = os.path.join(save_dir, name2id[convert_path(path)]) + '.json'
            with open(new_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, ensure_ascii=False, indent=4)
        except Exception as e:
            print(f"Error processing {path}: {e}")
            continue


