In [1]:
import os
import json
import shutil
from tqdm import tqdm

In [2]:

def load_json(json_path):
    """
    Load the JSON file containing image paths.
    
    Parameters:
    - json_path (str): Path to the JSON file.
    
    Returns:
    - list: List of image paths from the JSON file.
    """
    try:
        with open(json_path, 'r') as file:
            data = json.load(file)
            return [i.get("image") for i in data]
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error loading JSON file: {e}")
        return []


In [3]:

def get_current_root_path():
    """
    Get the current root path.
    
    Returns:
    - str: The current root directory path.
    """
    return os.getcwd()


In [4]:

def copy_images_to_new_root(image_paths, current_root, new_root):
    """
    Copy all images from the current root path to the new root path, maintaining folder structure,
    with progress tracking using tqdm.
    
    Parameters:
    - image_paths (list): List of image paths relative to the current root.
    - current_root (str): The current root path.
    - new_root (str): The new root path where images will be copied.
    """
    for image_path in tqdm(image_paths, desc="Copying images", unit="file"):
        current_image_path = os.path.join(current_root, image_path)
        new_image_path = os.path.join(new_root, image_path)
        
        try:
            if os.path.isfile(current_image_path):
                # Create necessary directories in the new root
                os.makedirs(os.path.dirname(new_image_path), exist_ok=True)
                shutil.copy2(current_image_path, new_image_path)
            else:
                print(f"File not found: {current_image_path}")
        except Exception as e:
            print(f"Error copying {current_image_path}: {e}")


In [5]:


json_path = "../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/mimic_factuality.json"  # Replace with your actual JSON file path
current_root = "../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files"  # Replace with your actual current root path
new_root = "../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/images"  # Replace with your actual new root path


In [6]:
image_paths = load_json(json_path)
image_paths[:2]

['p10/p10046166/s50051329/abea5eb9-b7c32823-3a14c5ca-77868030-69c83139.jpg',
 'p10/p10046166/s50051329/abea5eb9-b7c32823-3a14c5ca-77868030-69c83139.jpg']

In [7]:

if image_paths:
    print(f"Loaded {len(image_paths)} image paths from JSON.")
    # Copy images to the new root
    copy_images_to_new_root(image_paths, current_root, new_root)
else:
    print("No image paths found in the JSON file.")


Loaded 10361 image paths from JSON.


Copying images:   0%|          | 0/10361 [00:00<?, ?file/s]

Copying images: 100%|██████████| 10361/10361 [00:57<00:00, 179.54file/s]

File not found: ../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p18/p18224196/s54882267/1a5a59f7-d389a59a-1d55691a-0a77b80a-96ea4108.jpg
File not found: ../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p18/p18224196/s54882267/1a5a59f7-d389a59a-1d55691a-0a77b80a-96ea4108.jpg
File not found: ../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p18/p18224196/s54882267/1a5a59f7-d389a59a-1d55691a-0a77b80a-96ea4108.jpg
File not found: ../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p18/p18224196/s54882267/1a5a59f7-d389a59a-1d55691a-0a77b80a-96ea4108.jpg
File not found: ../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p18/p18224196/s54882267/1a5a59f7-d389a59a-1d55691a-0a77b80a-96ea4108.jpg
File not found: ../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p18/p18224196/s54882267/1a5a59f7-d389a59a-1d55691a-0a77b80a-96ea4108.jpg
File not found: ../data/data/physionet.org/files/mimic-cxr-jpg/2.0.0/files/p18/p18224196/s54882267/59a459f5-0bd5


