In [29]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [30]:
import os,sys
sys.path.insert(0,"..")
from glob import glob
import matplotlib.pyplot as plt
import shutil
import numpy as np
import pandas as pd
import pathlib
import torch
import torchvision
import torchxrayvision as xrv
import ast
import re
import skimage
from PIL import Image

Helper functions for creating the image subsets:

move_matching:
    df - first collumn is unique id labelled ['id']
    dest - Destination directory to move files
    source - source directory

rename_matching:
    df - first collumn is ['id'] and second is ['file_dir'] which points to the location of the files such that path/file_dir points to the file
    path - base folder for all data

    
    dataset - label for dataset
    subset - label for subset 
    view - label for view
    Note: they are just labels for the renaming and are not computed in any other way

In [31]:
import os
import pathlib
import shutil
from tqdm import tqdm

def rename_and_copy_files(df, source, dataset, subset, view, dest, default_file_type=".png", file_types=(".jpg", ".png")):
    pl_path = pathlib.Path(source)
    pl_dest = pathlib.Path(dest)
    pl_dest.mkdir(parents=True, exist_ok=True)
    renamed_files = []
    overlaps = []  # To store any overlaps

    for index, row in tqdm(df.iterrows(), total=df.shape[0], desc="Processing files"):
        uid = row['patientid']
        sub_path = row['file_dir']
        img_sub_path = pl_path.joinpath(sub_path)

        for file in img_sub_path.iterdir():
            if file.suffix in file_types:
                newName = f"{index}_{dataset}_{subset}_{view}_{uid}{file.suffix}"
                newPath = pl_dest / newName
                if not newPath.exists():
                    shutil.copy(file, newPath)
                    renamed_files.append(newPath)
                else:
                    overlaps.append(newPath)
    return renamed_files, overlaps  # Return both the list of renamed/copied files and the overlaps


def copy_files(file_paths, dest):
    pl_dest = pathlib.Path(dest)
    pl_dest.mkdir(parents=True, exist_ok=True)

    error_files = []

    for file_path in tqdm(file_paths, desc="Copying files"):
        try:
            destination_path = pl_dest / file_path.name
            if not destination_path.exists():
                shutil.copy(file_path, destination_path)
            else:
                error_files.append((file_path, "File already exists in destination."))
        except Exception as e:
            error_files.append((file_path, str(e)))

    return error_files

def create_subset(df, dest, source, dataset, subset, view, file_types=(".jpg", ".png")):
    renamed_files, overlaps = rename_and_copy_files(df, source, dataset, subset, view, dest, default_file_type=file_types[0], file_types=file_types)
    # If you want to print the overlaps:
    for overlap_msg in overlaps:
        print(overlap_msg)

    error_files = copy_files(renamed_files, dest)

    if error_files:
        print("Files that encountered errors during the copy operation:")
        for file, error in error_files:
            print(f"File: {file}, Error: {error}")


In [32]:
d_padchest = xrv.datasets.PC_Dataset(imgpath="/ssd2/averijordan/PADCHEST_SJ/image_zips", csvpath="/ssd2/averijordan/PADCHEST_SJ/labels_csv/PADCHEST_chest_x_ray_images_labels_160K_01.02.19.csv", views = ["PA", "AP"], unique_patients=False)

In [33]:
#Load mimic data
#CSV Paths
metadata_path = "/ssd2/jpmokc/datasets/physionet.org/files/mimic-cxr-jpg/2.0.0/mimic-cxr-2.0.0-metadata.csv.gz"
cheXpert_path = "/ssd2/jpmokc/datasets/physionet.org/files/mimic-cxr-jpg/2.0.0/mimic-cxr-2.0.0-chexpert.csv.gz"

d_mimic = xrv.datasets.MIMIC_Dataset(imgpath="/", csvpath=cheXpert_path, metacsvpath=metadata_path, views=['AP', 'PA'], unique_patients=False)



In [34]:
print(d_padchest.csv.columns)
print(d_mimic.csv.columns)

Index(['Unnamed: 0', 'ImageID', 'ImageDir', 'StudyDate_DICOM', 'StudyID',
       'PatientID', 'PatientBirth', 'PatientSex_DICOM', 'ViewPosition_DICOM',
       'Projection', 'MethodProjection', 'Pediatric', 'Modality_DICOM',
       'Manufacturer_DICOM', 'PhotometricInterpretation_DICOM',
       'PixelRepresentation_DICOM', 'PixelAspectRatio_DICOM',
       'SpatialResolution_DICOM', 'BitsStored_DICOM', 'WindowCenter_DICOM',
       'WindowWidth_DICOM', 'Rows_DICOM', 'Columns_DICOM',
       'XRayTubeCurrent_DICOM', 'Exposure_DICOM', 'ExposureInuAs_DICOM',
       'ExposureTime', 'RelativeXRayExposure_DICOM', 'ReportID', 'Report',
       'MethodLabel', 'Labels', 'Localizations',
       'LabelsLocalizationsBySentence', 'labelCUIS', 'LocalizationsCUIS',
       'view', 'offset_day_int', 'patientid', 'age_years', 'sex_male',
       'sex_female'],
      dtype='object')
Index(['subject_id', 'study_id', 'Atelectasis', 'Cardiomegaly',
       'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'F

In [35]:
#Create csv subset
cols = ['Cardiomegaly','No Finding', 'Pleural Effusion',"Pneumonia",'ProcedureCodeSequence_CodeMeaning', 'subject_id', 'study_id', 'dicom_id', 'patientid']
d_mimic_sub = d_mimic.csv[cols].copy()

#Add file directories collumn
d_mimic_sub['file_dir'] = "p" + d_mimic_sub['subject_id'].astype(str).str[:2] + "/p" + d_mimic_sub['subject_id'].astype(str) + "/s" + d_mimic_sub['study_id'].astype(str) + "/"


In [36]:
d_mimic_sub

Unnamed: 0,Cardiomegaly,No Finding,Pleural Effusion,Pneumonia,ProcedureCodeSequence_CodeMeaning,subject_id,study_id,dicom_id,patientid,file_dir
0,0.0,1.0,0.0,0.0,CHEST (PA AND LAT),10000032,50414267,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,10000032,p10/p10000032/s50414267/
2,0.0,1.0,0.0,0.0,CHEST (PA AND LAT),10000032,53189527,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,10000032,p10/p10000032/s53189527/
4,0.0,1.0,0.0,0.0,CHEST (PORTABLE AP),10000032,53911762,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,10000032,p10/p10000032/s53911762/
5,0.0,1.0,0.0,0.0,CHEST (PORTABLE AP),10000032,53911762,fffabebf-74fd3a1f-673b6b41-96ec0ac9-2ab69818,10000032,p10/p10000032/s53911762/
6,0.0,1.0,0.0,0.0,CHEST (PORTABLE AP),10000032,56699142,ea030e7a-2e3b1346-bc518786-7a8fd698-f673b44c,10000032,p10/p10000032/s56699142/
...,...,...,...,...,...,...,...,...,...,...
377089,0.0,1.0,0.0,0.0,CHEST (PA AND LAT),19999733,57132437,3fcd0406-9b111603-feae7033-96632b3a-111333e5,19999733,p19/p19999733/s57132437/
377090,0.0,1.0,0.0,0.0,CHEST (PA AND LAT),19999733,57132437,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,19999733,p19/p19999733/s57132437/
377092,-1.0,,0.0,,CHEST (PORTABLE AP),19999987,55368167,58766883-376a15ce-3b323a28-6af950a0-16b793bd,19999987,p19/p19999987/s55368167/
377093,,,,,CHEST (PORTABLE AP),19999987,58621812,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,19999987,p19/p19999987/s58621812/


In [37]:
allowed_ap_views = {'CHEST (PORTABLE AP)',  'CHEST (SINGLE VIEW)'}
allowed_pa_views = {'CHEST (PA AND LAT)'}


d_mimic_ap = d_mimic_sub[d_mimic_sub['ProcedureCodeSequence_CodeMeaning'].isin(allowed_ap_views)]
d_mimic_pa = d_mimic_sub[d_mimic_sub['ProcedureCodeSequence_CodeMeaning'].isin(allowed_pa_views)]

#Create filter for count of labels assignmed
exclusive_filter = d_mimic_sub[['Cardiomegaly','No Finding', 'Pleural Effusion', 'Pneumonia']].sum(axis=1)
binary_filter = d_mimic_sub[['Cardiomegaly','No Finding']].sum(axis=1)

#Apply filter so that only entries with one label are left
d_mimic_ap = d_mimic_ap[binary_filter == 1]
d_mimic_pa = d_mimic_pa[binary_filter == 1]


In [38]:
d_mimic_cardiomegaly_ap = d_mimic_ap[d_mimic_ap['Cardiomegaly'] == 1.0]
d_mimic_no_finding_ap  = d_mimic_ap[d_mimic_ap['No Finding'] == 1.0]

d_mimic_cardiomegaly_pa  = d_mimic_pa[d_mimic_pa['Cardiomegaly'] == 1.0]
d_mimic_no_finding_pa       = d_mimic_pa[d_mimic_pa['No Finding'] == 1.0]


In [39]:
d_mimic_cardiomegaly_ap_file_info  = d_mimic_cardiomegaly_ap[["patientid", "file_dir"]].copy()
d_mimic_cardiomegaly_pa_file_info  = d_mimic_cardiomegaly_pa[["patientid", "file_dir"]].copy()

d_mimic_no_finding_ap_file_info = d_mimic_no_finding_ap[["patientid", "file_dir"]].copy()
d_mimic_no_finding_pa_file_info = d_mimic_no_finding_pa[["patientid", "file_dir"]].copy()



In [40]:
totals = d_padchest.totals()
pathologies = []
for p in totals:
    pathologies.append(p)

pathologies.append('Normal')
pathologies.append('pleural effusion')

In [41]:
pathologies

['Air Trapping',
 'Aortic Atheromatosis',
 'Aortic Elongation',
 'Atelectasis',
 'Bronchiectasis',
 'Cardiomegaly',
 'Consolidation',
 'Costophrenic Angle Blunting',
 'Edema',
 'Effusion',
 'Emphysema',
 'Fibrosis',
 'Flattened Diaphragm',
 'Fracture',
 'Granuloma',
 'Hemidiaphragm Elevation',
 'Hernia',
 'Hilar Enlargement',
 'Infiltration',
 'Mass',
 'Nodule',
 'Pleural_Thickening',
 'Pneumonia',
 'Pneumothorax',
 'Scoliosis',
 'Support Devices',
 'Tube',
 'Tuberculosis',
 'normal',
 'pleural effusion']

In [28]:
#Seperate Collumns we want
d_labelData = pd.concat([d_padchest.csv["Labels"],d_padchest.csv["LabelsLocalizationsBySentence"], d_padchest.csv["patientid"], d_padchest.csv["StudyID"], d_padchest.csv["Projection"], d_padchest.csv["ImageID"], d_padchest.csv["ImageDir"]], axis=1)

#Seperate out the bundled inner arrays 
d_labelData["Labels"] = d_labelData["Labels"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

d_labelData = d_labelData[d_labelData["Labels"].apply(lambda x: any(label.lower() in [s.lower() for s in x] for label in pathologies))]

#Create Collum for file directories (used in above function)
d_labelData["file_dir"] = "images-" + d_labelData["ImageDir"].astype(str) + "/"

#Chop off the .jpg
d_labelData['ImageID'] = d_labelData['ImageID'].apply(lambda x: x[:-4])



{'Atelectasis': {0.0: 82269, 1.0: 48790},
 'Cardiomegaly': {0.0: 92752, 1.0: 47673},
 'Consolidation': {0.0: 88179, 1.0: 11525},
 'Edema': {0.0: 101747, 1.0: 29331},
 'Effusion': {0.0: 102492, 1.0: 57721},
 'Enlarged Cardiomediastinum': {0.0: 84746, 1.0: 7657},
 'Fracture': {0.0: 81584, 1.0: 4781},
 'Lung Lesion': {0.0: 81685, 1.0: 6632},
 'Lung Opacity': {0.0: 83336, 1.0: 54769},
 'Pleural Other': {0.0: 81202, 1.0: 2083},
 'Pneumonia': {0.0: 96223, 1.0: 17222},
 'Pneumothorax': {0.0: 120294, 1.0: 11235},
 'Support Devices': {0.0: 84023, 1.0: 61717}}
{'Air Trapping': {0.0: 91429, 1.0: 3396},
 'Aortic Atheromatosis': {0.0: 93193, 1.0: 1632},
 'Aortic Elongation': {0.0: 87071, 1.0: 7754},
 'Atelectasis': {0.0: 90480, 1.0: 4345},
 'Bronchiectasis': {0.0: 93314, 1.0: 1511},
 'Cardiomegaly': {0.0: 86081, 1.0: 8744},
 'Consolidation': {0.0: 93729, 1.0: 1096},
 'Costophrenic Angle Blunting': {0.0: 91234, 1.0: 3591},
 'Edema': {0.0: 94366, 1.0: 459},
 'Effusion': {0.0: 90887, 1.0: 3938},
 'Emp

KeyboardInterrupt: 

In [None]:
#Seperate Collumns we want
d_labelData = pd.concat([d_padchest.csv["Labels"],d_padchest.csv["LabelsLocalizationsBySentence"], d_padchest.csv["patientid"], d_padchest.csv["StudyID"], d_padchest.csv["Projection"], d_padchest.csv["ImageID"], d_padchest.csv["ImageDir"]], axis=1)

#Seperate out the bundled inner arrays 
d_labelData["Labels"] = d_labelData["Labels"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

d_labelData = d_labelData[d_labelData["Labels"].apply(lambda x: any(label.lower() in [s.lower() for s in x] for label in pathologies))]

#Create Collum for file directories (used in above function)
d_labelData["file_dir"] = "images-" + d_labelData["ImageDir"].astype(str) + "/"

#Chop off the .jpg
d_labelData['ImageID'] = d_labelData['ImageID'].apply(lambda x: x[:-4])



In [None]:
# Create label subset for pad_chest

# For cardiomegaly, ensure that 'normal' is not present
d_padchest_cardiomegaly= d_labelData[d_labelData["Labels"].apply(lambda x: ('cardiomegaly' in x) and ('normal' not in x))]

# For normal, ensure that 'cardiomegaly' is not present
d_padchest_no_finding = d_labelData[d_labelData["Labels"].apply(lambda x: ('normal' in x) and ('pleural Cardiomegaly_' not in x))]


In [None]:
d_padchest_no_finding["Labels"] = d_padchest_no_finding["Labels"].apply(lambda labels_list: ["No-Finding" if label == "normal" else label for label in labels_list])


In [None]:
d_padchest_cardiomegaly["Labels"] = d_padchest_cardiomegaly["Labels"].apply(lambda labels_list: [label.title() for label in labels_list])
d_padchest_cardiomegaly

In [None]:
#Seperate out views

d_padchest_cardiomegaly_ap = d_padchest_cardiomegaly[d_padchest_cardiomegaly['Projection'] == "AP"]
d_padchest_cardiomegaly_pa = d_padchest_cardiomegaly[d_padchest_cardiomegaly['Projection'] == "PA"]

d_padchest_no_finding_ap = d_padchest_no_finding[d_padchest_no_finding['Projection'] == "AP"]
d_padchest_no_finding_pa = d_padchest_no_finding[d_padchest_no_finding['Projection'] == "PA"]

In [None]:

d_padchest_cardiomegaly_ap_file_info = d_padchest_cardiomegaly_ap[["patientid", "ImageID", "file_dir"]].copy()
d_padchest_cardiomegaly_pa_file_info = d_padchest_cardiomegaly_pa[["patientid","ImageID", "file_dir"]].copy()

d_padchest_no_finding_ap_file_info = d_padchest_no_finding_ap[["patientid", "ImageID", "file_dir"]].copy()
d_padchest_no_finding_pa_file_info = d_padchest_no_finding_pa[["patientid", "ImageID", "file_dir"]].copy()


In [None]:

d_padchest_cardiomegaly_ap_file_info.reset_index(drop=True, inplace = True)
d_padchest_cardiomegaly_pa_file_info.reset_index(drop=True, inplace = True)

d_padchest_no_finding_ap_file_info.reset_index(drop=True, inplace = True)
d_padchest_no_finding_pa_file_info.reset_index(drop=True, inplace = True)

d_mimic_cardiomegaly_ap_file_info.reset_index(drop=True, inplace = True)
d_mimic_cardiomegaly_pa_file_info.reset_index(drop=True, inplace = True)

d_mimic_no_finding_ap_file_info.reset_index(drop=True, inplace = True)
d_mimic_no_finding_pa_file_info.reset_index(drop=True, inplace = True)

In [None]:
#Move every to csvs for later use

d_padchest_cardiomegaly_ap_file_info.to_csv("d_padchest_cardiomegaly_ap.csv")
d_padchest_cardiomegaly_pa_file_info.to_csv("d_padchest_cardiomegaly_pa.csv")

d_padchest_no_finding_ap_file_info.to_csv("d_padchest_no_finding_ap.csv")
d_padchest_no_finding_pa_file_info.to_csv("d_padchest_no_finding_pa.csv")

d_mimic_cardiomegaly_ap_file_info.to_csv("d_mimic_cardiomegaly_ap.csv")    
d_mimic_cardiomegaly_pa_file_info.to_csv("d_mimic_cardiomegaly_pa.csv")    

d_mimic_no_finding_ap_file_info.to_csv("d_mimic_no_finding_ap.csv")
d_mimic_no_finding_pa_file_info.to_csv("d_mimic_no_finding_pa.csv")

In [None]:
src_path_mimic = "/ssd2/jpmokc/datasets/physionet.org/files/mimic-cxr-jpg/2.0.0/files/"
src_path_pad = "/ssd2/averijordan/PADCHEST_SJ/image_zips/"
dest_path = "/ssd2/averijordan/datasets/"


In [None]:
datasets = {

    # PadChest datasets
    "d_padchest_cardiomegaly_ap": {
        "data": d_padchest_cardiomegaly_ap,
        "file_info": d_padchest_cardiomegaly_ap_file_info,
        "dataset_name": "PadChest",
    },
    "d_padchest_cardiomegaly_pa": {
        "data": d_padchest_cardiomegaly_pa,
        "file_info": d_padchest_cardiomegaly_pa_file_info,
        "dataset_name": "PadChest",
    },
    "d_padchest_no_finding_ap": {
        "data": d_padchest_no_finding_ap,
        "file_info": d_padchest_no_finding_ap_file_info,
        "dataset_name": "PadChest",
    },
    "d_padchest_no_finding_pa": {
        "data": d_padchest_no_finding_pa,
        "file_info": d_padchest_no_finding_pa_file_info,
        "dataset_name": "PadChest",
    },

    # MIMIC datasets
    "d_mimic_cardiomegaly_ap": {
        "data": d_mimic_cardiomegaly_ap,
        "file_info": d_mimic_cardiomegaly_ap_file_info,
        "dataset_name": "MIMIC",
    },
    "d_mimic_cardiomegaly_pa": {
        "data": d_mimic_cardiomegaly_pa,
        "file_info": d_mimic_cardiomegaly_pa_file_info,
        "dataset_name": "MIMIC",
    },
    "d_mimic_no_finding_ap": {
        "data": d_mimic_no_finding_ap,
        "file_info": d_mimic_no_finding_ap_file_info,
        "dataset_name": "MIMIC",
    },
    "d_mimic_no_finding_pa": {
        "data": d_mimic_no_finding_pa,
        "file_info": d_mimic_no_finding_pa_file_info,
        "dataset_name": "MIMIC",
    },
}


In [None]:
def sample_dataframes(data, file_info, sample_size=1000, random_state=100):
    # Reset the indices for both dataframes
    data.reset_index(drop=True, inplace=True)
    file_info.reset_index(drop=True, inplace=True)
    
    n_samples = min(sample_size, len(data))
    
    # Get the indices of the sampled data
    sampled_indices = data.sample(n=n_samples, random_state=random_state).index
    
    # Update the data and file_info using the sampled indices in place
    for df in [data, file_info]:
        drop_indices = set(df.index) - set(sampled_indices)
        df.drop(drop_indices, inplace=True)
for key, dataset_info in datasets.items():
    sample_dataframes(dataset_info['data'], dataset_info['file_info'], sample_size=430,random_state=50)
    print(f"Sampled dataset {key}. New length: {len(dataset_info['data'])} , {len(dataset_info['file_info'])}")


In [None]:
d_mimic_cardiomegaly_ap

In [None]:
create_subset(d_padchest_cardiomegaly_ap_file_info, dest_path+"/PadChest/Cardiomegaly/AP",src_path_pad,"PadChest", "Cardiomegaly", "AP", ".png")
create_subset(d_padchest_no_finding_ap_file_info, dest_path+"/PadChest/No-Finding/AP",src_path_pad,"PadChest", "No-Finding", "AP", ".png")


In [27]:
create_subset(d_mimic_cardiomegaly_ap_file_info, dest_path+"/MIMIC/Cardiomegaly/AP",src_path_mimic,"MIMIC", "Cardiomegaly", "AP", ".jpg")
create_subset(d_mimic_no_finding_ap_file_info, dest_path+"/MIMIC/No-Finding/AP",src_path_mimic,"MIMIC", "No-Finding", "AP", ".jpg")


Processing files: 100%|██████████| 430/430 [00:01<00:00, 290.24it/s]


/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/260_MIMIC_Cardiomegaly_AP_10073847.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/422_MIMIC_Cardiomegaly_AP_10147087.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/422_MIMIC_Cardiomegaly_AP_10147087.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/422_MIMIC_Cardiomegaly_AP_10147087.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/422_MIMIC_Cardiomegaly_AP_10147087.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/422_MIMIC_Cardiomegaly_AP_10147087.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/704_MIMIC_Cardiomegaly_AP_10216556.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/900_MIMIC_Cardiomegaly_AP_10268877.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/1176_MIMIC_Cardiomegaly_AP_10337761.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/2458_MIMIC_Cardiomegaly_AP_10712217.jpg
/ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/2458_MIMIC_Cardiomegaly_AP_10712217.jpg
/ssd2/averijordan/datasets/MI

Copying files: 100%|██████████| 406/406 [00:00<00:00, 107309.06it/s]


Files that encountered errors during the copy operation:
File: /ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/2_MIMIC_Cardiomegaly_AP_10001884.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/133_MIMIC_Cardiomegaly_AP_10024982.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/167_MIMIC_Cardiomegaly_AP_10039272.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/213_MIMIC_Cardiomegaly_AP_10062020.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/234_MIMIC_Cardiomegaly_AP_10066209.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/247_MIMIC_Cardiomegaly_AP_10071403.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/Cardiomegaly/AP/260_MIMIC_Cardiomegaly_AP_10073847.jpg, Error: File already ex

Processing files: 100%|██████████| 430/430 [00:00<00:00, 543.60it/s]


/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/340_MIMIC_No-Finding_AP_10148145.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/370_MIMIC_No-Finding_AP_10161682.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/465_MIMIC_No-Finding_AP_10214395.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/573_MIMIC_No-Finding_AP_10259412.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/575_MIMIC_No-Finding_AP_10259847.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/579_MIMIC_No-Finding_AP_10261569.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/659_MIMIC_No-Finding_AP_10293407.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/789_MIMIC_No-Finding_AP_10347477.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/928_MIMIC_No-Finding_AP_10407582.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/1671_MIMIC_No-Finding_AP_10767116.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/1671_MIMIC_No-Finding_AP_10767116.jpg
/ssd2/averijordan/datasets/MIMIC/No-Finding/AP/1676_MIMIC_No-Finding_AP_10

Copying files: 100%|██████████| 339/339 [00:00<00:00, 176761.44it/s]

Files that encountered errors during the copy operation:
File: /ssd2/averijordan/datasets/MIMIC/No-Finding/AP/109_MIMIC_No-Finding_AP_10042350.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/No-Finding/AP/246_MIMIC_No-Finding_AP_10113898.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/No-Finding/AP/340_MIMIC_No-Finding_AP_10148145.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/No-Finding/AP/370_MIMIC_No-Finding_AP_10161682.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/No-Finding/AP/408_MIMIC_No-Finding_AP_10183638.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/No-Finding/AP/441_MIMIC_No-Finding_AP_10197435.jpg, Error: File already exists in destination.
File: /ssd2/averijordan/datasets/MIMIC/No-Finding/AP/465_MIMIC_No-Finding_AP_10214395.jpg, Error: File already exists in destination.
File:




Make sure the file names are correct here if you reuse this code