In [1]:
import os
import pprint
import numpy as np
import pandas as pd
from pathlib import Path

In [2]:
def updateDcmPath(og_df, dcm_folder):

    try:

        # Creat new columns in og_df.
        og_df["full_path"] = np.nan
        og_df["crop_path"] = np.nan
        og_df["mask_path"] = np.nan

        # Get list of .dcm paths.
        dcm_paths_list = []
        for _, _, files in os.walk(dcm_folder):
            for f in files:
                if f.endswith(".dcm"):
                    dcm_paths_list.append(os.path.join(dcm_folder, f))

        for row in og_df.itertuples():

            row_id = row.Index

            # Get identification details.
            patient_id = row.patient_id
            img_view = row.image_view
            lr = row.left_or_right_breast
            abnormality_id = row.abnormality_id

            # Use this list to match DF row with .dcm path.
            info_list = [patient_id, img_view, lr]

            crop_suffix = "CROP_" + str(abnormality_id)
            mask_suffix = "MASK_" + str(abnormality_id)

            # Get list of relevant paths to this patient.
            full_paths = [
                path
                for path in dcm_paths_list
                if all(info in path for info in info_list + ["FULL"])
            ]

            crop_paths = [
                path
                for path in dcm_paths_list
                if all(info in path for info in info_list + [crop_suffix])
            ]

            mask_paths = [
                path
                for path in dcm_paths_list
                if all(info in path for info in info_list + [mask_suffix])
            ]

            # full_paths_str = ",".join(full_paths)
            # crop_paths_str = ",".join(crop_paths)
            # mask_paths_str = ",".join(mask_paths)

            # Update paths.
            if len(full_paths) > 0:
                og_df.loc[row_id, "full_path"] = full_paths
            if len(crop_paths) > 0:
                og_df.loc[row_id, "crop_path"] = crop_paths
            if len(mask_paths) > 0:
                og_df.loc[row_id, "mask_path"] = mask_paths

                    
    except Exception as e:
        print((f"Unable to get updateDcmPath!\n{e}"))
    
    

    return og_df

In [15]:
train = pd.read_csv("data\Calc\Calc-Train-Description.csv")
test = pd.read_csv("data\Calc\Calc-Test-Description.csv")

In [16]:
train_path = "data\Calc\Train\ALL DCM"
test_path = "data\Calc\Test\ALL DCM"

In [17]:
new_cols = [col.replace(" ", "_") for col in train.columns]
train.columns = new_cols   
new_cols = [col.replace(" ", "_") for col in test.columns]
test.columns = new_cols   

In [18]:
updated_train = updateDcmPath(train,train_path)
updated_test = updateDcmPath(test,test_path)

In [22]:
for i in range(1,1546) :
        if type(updated_train['full_path'][i]) == list :
            updated_train['full_path'][i] = updated_train['full_path'][i][0]
        if type(updated_train['crop_path'][i]) == list :
            updated_train['crop_path'][i] = updated_train['crop_path'][i][0]
        if type(updated_train['mask_path'][i]) == list :
            updated_train['mask_path'][i] = updated_train['mask_path'][i][0]   

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  updated_train['full_path'][i] = updated_train['full_path'][i][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  updated_train['crop_path'][i] = updated_train['crop_path'][i][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  updated_train['mask_path'][i] = updated_train['mask_path'][i][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

In [23]:
updated_train['full_path'] = updated_train['full_path'].str.replace('\\', '/')
updated_train['crop_path'] = updated_train['crop_path'].str.replace('\\', '/')
updated_train['mask_path'] = updated_train['mask_path'].str.replace('\\', '/')

In [25]:
updated_train

Unnamed: 0,patient_id,breast_density,left_or_right_breast,image_view,abnormality_id,abnormality_type,calc_type,calc_distribution,assessment,pathology,subtlety,image_file_path,cropped_image_file_path,ROI_mask_file_path,full_path,crop_path,mask_path
0,P_00005,3,RIGHT,CC,1,calcification,AMORPHOUS,CLUSTERED,3,MALIGNANT,3,Calc-Training_P_00005_RIGHT_CC/1.3.6.1.4.1.959...,Calc-Training_P_00005_RIGHT_CC_1/1.3.6.1.4.1.9...,Calc-Training_P_00005_RIGHT_CC_1/1.3.6.1.4.1.9...,data/Calc/Train/ALL DCM/Calc-Training_P_00005_...,data/Calc/Train/ALL DCM/Calc-Training_P_00005_...,data/Calc/Train/ALL DCM/Calc-Training_P_00005_...
1,P_00005,3,RIGHT,MLO,1,calcification,AMORPHOUS,CLUSTERED,3,MALIGNANT,3,Calc-Training_P_00005_RIGHT_MLO/1.3.6.1.4.1.95...,Calc-Training_P_00005_RIGHT_MLO_1/1.3.6.1.4.1....,Calc-Training_P_00005_RIGHT_MLO_1/1.3.6.1.4.1....,data/Calc/Train/ALL DCM/Calc-Training_P_00005_...,data/Calc/Train/ALL DCM/Calc-Training_P_00005_...,data/Calc/Train/ALL DCM/Calc-Training_P_00005_...
2,P_00007,4,LEFT,CC,1,calcification,PLEOMORPHIC,LINEAR,4,BENIGN,4,Calc-Training_P_00007_LEFT_CC/1.3.6.1.4.1.9590...,Calc-Training_P_00007_LEFT_CC_1/1.3.6.1.4.1.95...,Calc-Training_P_00007_LEFT_CC_1/1.3.6.1.4.1.95...,data/Calc/Train/ALL DCM/Calc-Training_P_00007_...,data/Calc/Train/ALL DCM/Calc-Training_P_00007_...,data/Calc/Train/ALL DCM/Calc-Training_P_00007_...
3,P_00007,4,LEFT,MLO,1,calcification,PLEOMORPHIC,LINEAR,4,BENIGN,4,Calc-Training_P_00007_LEFT_MLO/1.3.6.1.4.1.959...,Calc-Training_P_00007_LEFT_MLO_1/1.3.6.1.4.1.9...,Calc-Training_P_00007_LEFT_MLO_1/1.3.6.1.4.1.9...,data/Calc/Train/ALL DCM/Calc-Training_P_00007_...,data/Calc/Train/ALL DCM/Calc-Training_P_00007_...,data/Calc/Train/ALL DCM/Calc-Training_P_00007_...
4,P_00008,1,LEFT,CC,1,calcification,,REGIONAL,2,BENIGN_WITHOUT_CALLBACK,3,Calc-Training_P_00008_LEFT_CC/1.3.6.1.4.1.9590...,Calc-Training_P_00008_LEFT_CC_1/1.3.6.1.4.1.95...,Calc-Training_P_00008_LEFT_CC_1/1.3.6.1.4.1.95...,data/Calc/Train/ALL DCM/Calc-Training_P_00008_...,data/Calc/Train/ALL DCM/Calc-Training_P_00008_...,data/Calc/Train/ALL DCM/Calc-Training_P_00008_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1541,P_02566,2,RIGHT,MLO,1,calcification,AMORPHOUS-PLEOMORPHIC,SEGMENTAL,5,MALIGNANT,5,Calc-Training_P_02566_RIGHT_MLO/1.3.6.1.4.1.95...,Calc-Training_P_02566_RIGHT_MLO_1/1.3.6.1.4.1....,Calc-Training_P_02566_RIGHT_MLO_1/1.3.6.1.4.1....,data/Calc/Train/ALL DCM/Calc-Training_P_02566_...,data/Calc/Train/ALL DCM/Calc-Training_P_02566_...,data/Calc/Train/ALL DCM/Calc-Training_P_02566_...
1542,P_02572,2,LEFT,CC,1,calcification,AMORPHOUS,CLUSTERED,0,MALIGNANT,2,Calc-Training_P_02572_LEFT_CC/1.3.6.1.4.1.9590...,Calc-Training_P_02572_LEFT_CC_1/1.3.6.1.4.1.95...,Calc-Training_P_02572_LEFT_CC_1/1.3.6.1.4.1.95...,data/Calc/Train/ALL DCM/Calc-Training_P_02572_...,data/Calc/Train/ALL DCM/Calc-Training_P_02572_...,data/Calc/Train/ALL DCM/Calc-Training_P_02572_...
1543,P_02572,2,LEFT,MLO,1,calcification,AMORPHOUS,CLUSTERED,0,MALIGNANT,2,Calc-Training_P_02572_LEFT_MLO/1.3.6.1.4.1.959...,Calc-Training_P_02572_LEFT_MLO_1/1.3.6.1.4.1.9...,Calc-Training_P_02572_LEFT_MLO_1/1.3.6.1.4.1.9...,data/Calc/Train/ALL DCM/Calc-Training_P_02572_...,data/Calc/Train/ALL DCM/Calc-Training_P_02572_...,data/Calc/Train/ALL DCM/Calc-Training_P_02572_...
1544,P_02584,1,LEFT,CC,1,calcification,PLEOMORPHIC,SEGMENTAL,0,BENIGN,4,Calc-Training_P_02584_LEFT_CC/1.3.6.1.4.1.9590...,Calc-Training_P_02584_LEFT_CC_1/1.3.6.1.4.1.95...,Calc-Training_P_02584_LEFT_CC_1/1.3.6.1.4.1.95...,data/Calc/Train/ALL DCM/Calc-Training_P_02584_...,data/Calc/Train/ALL DCM/Calc-Training_P_02584_...,data/Calc/Train/ALL DCM/Calc-Training_P_02584_...


In [28]:
for i in range(1,326) :
        if type(updated_test['full_path'][i]) == list :
            updated_test['full_path'][i] = updated_test['full_path'][i][0]
        if type(updated_test['crop_path'][i]) == list :
            updated_test['crop_path'][i] = updated_test['crop_path'][i][0]
        if type(updated_test['mask_path'][i]) == list :
            updated_test['mask_path'][i] = updated_test['mask_path'][i][0] 

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  updated_test['full_path'][i] = updated_test['full_path'][i][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  updated_test['crop_path'][i] = updated_test['crop_path'][i][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  updated_test['mask_path'][i] = updated_test['mask_path'][i][0]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-

In [29]:
updated_test['full_path'] = updated_test['full_path'].str.replace('\\', '/')
updated_test['crop_path'] = updated_test['crop_path'].str.replace('\\', '/')
updated_test['mask_path'] = updated_test['mask_path'].str.replace('\\', '/')

In [31]:
updated_test

Unnamed: 0,patient_id,breast_density,left_or_right_breast,image_view,abnormality_id,abnormality_type,calc_type,calc_distribution,assessment,pathology,subtlety,image_file_path,cropped_image_file_path,ROI_mask_file_path,full_path,crop_path,mask_path
0,P_00038,2,LEFT,CC,1,calcification,PUNCTATE-PLEOMORPHIC,CLUSTERED,4,BENIGN,2,Calc-Test_P_00038_LEFT_CC/1.3.6.1.4.1.9590.100...,Calc-Test_P_00038_LEFT_CC_1/1.3.6.1.4.1.9590.1...,Calc-Test_P_00038_LEFT_CC_1/1.3.6.1.4.1.9590.1...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_LEFT_...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_LEFT_...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_LEFT_...
1,P_00038,2,LEFT,MLO,1,calcification,PUNCTATE-PLEOMORPHIC,CLUSTERED,4,BENIGN,2,Calc-Test_P_00038_LEFT_MLO/1.3.6.1.4.1.9590.10...,Calc-Test_P_00038_LEFT_MLO_1/1.3.6.1.4.1.9590....,Calc-Test_P_00038_LEFT_MLO_1/1.3.6.1.4.1.9590....,data/Calc/Test/ALL DCM/Calc-Test_P_00038_LEFT_...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_LEFT_...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_LEFT_...
2,P_00038,2,RIGHT,CC,1,calcification,VASCULAR,,2,BENIGN_WITHOUT_CALLBACK,5,Calc-Test_P_00038_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_00038_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_00038_RIGHT_CC_1/1.3.6.1.4.1.9590....,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...
3,P_00038,2,RIGHT,CC,2,calcification,VASCULAR,,2,BENIGN_WITHOUT_CALLBACK,5,Calc-Test_P_00038_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_00038_RIGHT_CC_2/1.3.6.1.4.1.9590....,Calc-Test_P_00038_RIGHT_CC_2/1.3.6.1.4.1.9590....,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...
4,P_00038,2,RIGHT,MLO,1,calcification,VASCULAR,,2,BENIGN_WITHOUT_CALLBACK,5,Calc-Test_P_00038_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_00038_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_00038_RIGHT_MLO_1/1.3.6.1.4.1.9590...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_00038_RIGHT...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
321,P_02464,2,RIGHT,MLO,1,calcification,FINE_LINEAR_BRANCHING,CLUSTERED,0,MALIGNANT,4,Calc-Test_P_02464_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_02464_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_02464_RIGHT_MLO_1/1.3.6.1.4.1.9590...,data/Calc/Test/ALL DCM/Calc-Test_P_02464_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02464_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02464_RIGHT...
322,P_02498,4,RIGHT,CC,1,calcification,PUNCTATE,CLUSTERED,0,BENIGN,3,Calc-Test_P_02498_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_02498_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_02498_RIGHT_CC_1/1.3.6.1.4.1.9590....,data/Calc/Test/ALL DCM/Calc-Test_P_02498_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02498_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02498_RIGHT...
323,P_02498,4,RIGHT,MLO,1,calcification,PUNCTATE,CLUSTERED,0,BENIGN,3,Calc-Test_P_02498_RIGHT_MLO/1.3.6.1.4.1.9590.1...,Calc-Test_P_02498_RIGHT_MLO_1/1.3.6.1.4.1.9590...,Calc-Test_P_02498_RIGHT_MLO_1/1.3.6.1.4.1.9590...,data/Calc/Test/ALL DCM/Calc-Test_P_02498_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02498_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02498_RIGHT...
324,P_02501,3,RIGHT,CC,1,calcification,PLEOMORPHIC,CLUSTERED,0,MALIGNANT,3,Calc-Test_P_02501_RIGHT_CC/1.3.6.1.4.1.9590.10...,Calc-Test_P_02501_RIGHT_CC_1/1.3.6.1.4.1.9590....,Calc-Test_P_02501_RIGHT_CC_1/1.3.6.1.4.1.9590....,data/Calc/Test/ALL DCM/Calc-Test_P_02501_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02501_RIGHT...,data/Calc/Test/ALL DCM/Calc-Test_P_02501_RIGHT...


In [32]:
updated_train.to_csv('data/Calc/Updated_Calc_Train.csv', index=False)
updated_test.to_csv('data/Calc/Updated_Calc_Test.csv', index=False)