# Import Necessary Libraries

In [1]:
import os
import shutil
import pandas as pd
import pyreadstat
import numpy as np
from PIL import Image
import pydicom
import matplotlib.pyplot as plt
import cv2
from skimage import exposure
from skimage import filters
from sklearn.model_selection import train_test_split
import SimpleITK as sitk

# Define Necessary Functions

In [5]:
def count_files_in_folder(folder_path):

  file_count = 0
  for root, dirs, files in os.walk(folder_path):
    file_count += len(files)
    break  

  return file_count

In [6]:
def dicom_to_nifti(origin_path, destination_path):
    folder_names = []
    folder_names = get_folder_names(origin_path, folder_names)
    for idx, name in enumerate(folder_names):
        print(f"Patient {idx}'s data was processed.")
        scan_origin_path = origin_path + name + '/FGT'
        scan_destination_path = destination_path + name + '.nii.gz'
        reader = sitk.ImageSeriesReader()
        dicom_names = reader.GetGDCMSeriesFileNames(scan_origin_path)
        reader.SetFileNames(dicom_names)
        image = reader.Execute()
        sitk.WriteImage(image, scan_destination_path)

In [7]:
def move_folders(folder_list, source_directory, destination_directory):
    for folder_name in folder_list:
        folder_name = str(folder_name)
        source_path = os.path.join(source_directory, folder_name)
        destination_path = os.path.join(destination_directory, folder_name)
        shutil.move(source_path, destination_path)
        print(f"Moved folder '{folder_name}' from '{source_directory}' to '{destination_directory}'")

In [8]:
def create_pid_folder(parent_directory, pid, data_info):
    for folder_name in pid:
        side = np.array(data_info[data_info.code==int(folder_name)].Side)[0]
        
        if (int(folder_name) in wrong_data) or (side=='N'):
            continue
        else:
            if type(folder_name) == str:
                folder_path = os.path.join(parent_directory, folder_name)
            else:
                folder_path = os.path.join(parent_directory, str(folder_name))
                
            os.makedirs(folder_path, exist_ok=True)

In [9]:
def create_pid_folder_without_info(parent_directory, pid):
    for folder_name in pid:
        
        if int(folder_name) in wrong_data:
            continue
        else:
            if type(folder_name) == str:
                folder_path = os.path.join(parent_directory, folder_name)
            else:
                folder_path = os.path.join(parent_directory, str(folder_name))
                
            os.makedirs(folder_path, exist_ok=True)

            fgt_folder_path = os.path.join(folder_path, 'FGT')
            bpe_folder_path = os.path.join(folder_path, 'BPE')
            os.makedirs(fgt_folder_path, exist_ok=True)
            os.makedirs(bpe_folder_path, exist_ok=True)

In [10]:
def automated_copy(origin_directory, destination_directory, pid, fgt_idx, bpe_idx):
    present_data = []
    present_data = get_folder_names(destination_directory, present_data)
    create_pid_folder_without_info(destination_directory, pid)
    unique_idx = []
    img_folder_names = []
    for idx3, patient_id in enumerate(pid):
        if (str(patient_id) in present_data) or (int(patient_id) in wrong_data):
            continue
        else:
            unique_idx.append(idx3)
            dcm_directory = origin_directory + str(patient_id) + '/IMAGE/DCM/'
            for k, (root, dirs, files) in enumerate(os.walk(dcm_directory)):
                if k == 0:
                    img_folder_names.append(dcm_directory + str(dirs[k]))

    c = 0           
    for idx1, patient_id in enumerate(pid):
        if int(patient_id) in wrong_data:
            continue
        else:
            print(f"i={idx1} | pid={patient_id}")
            if idx1 not in unique_idx:
                print(f"{idx1} is not in the unique idx")
                continue
            else:
                start_fgt, end_fgt = fgt_idx[idx1]
                start_bpe, end_bpe = bpe_idx[idx1]
                
                difference_fgt = end_fgt - start_fgt
                difference_bpe = end_bpe - start_bpe
                if difference_fgt != difference_bpe:
                    print("Slice Number is Different for FGT and BPE")
                    break
                destination_fgt_folder = destination_directory + str(patient_id) + '/FGT'
                destination_bpe_folder = destination_directory + str(patient_id) + '/BPE'
                for j1 in range(start_fgt, end_fgt+1):
                    if len(str(j1)) == 4:
                        fgt_img_directory = img_folder_names[c] + '/I000' + str(j1)
                    else:
                        fgt_img_directory = img_folder_names[c] + '/I0000' + str(j1)
                    shutil.copy(fgt_img_directory, destination_fgt_folder)
                    
                for j2 in range(start_bpe, end_bpe+1):
                    if len(str(j2)) == 4:
                        bpe_img_directory = img_folder_names[c] + '/I000' + str(j2)
                    else:
                        bpe_img_directory = img_folder_names[c] + '/I0000' + str(j2)
                    shutil.copy(bpe_img_directory, destination_bpe_folder)
                c += 1

In [11]:
# Each folder name is a string.
def get_folder_names(path, folders):
    for entry in os.scandir(path):
        if entry.is_dir():
            folders.append(entry.name)
    return folders

In [12]:
# Each file name is a string.
def get_file_names(files_path):
    files_names = os.listdir(files_path)
    return files_names

In [13]:
def slice_checker(fgt_list, bpe_list, pid_list):
    for i in range(len(fgt_list)):
        start_fgt, end_fgt = fgt_list[i]
        start_bpe, end_bpe = bpe_list[i]
        
        difference_fgt = end_fgt - start_fgt
        difference_bpe = end_bpe - start_bpe
        if difference_fgt != difference_bpe:
            print(f"Slice Number is Different for FGT and BPE of {pid_list[i]}")

In [14]:
def normalize_threshold_rotate(dicom_img_path):
    ds = pydicom.dcmread(dicom_img_path)
    pixel_array = ds.pixel_array

    pixel_array = exposure.rescale_intensity(pixel_array, out_range=np.uint8)
    
    t = filters.threshold_li(pixel_array)
    pixel_array[pixel_array<t] = 0
    
    pixel_array = np.flipud(pixel_array)
    # pixel_array = np.rot90(pixel_array, k=2)
    
    return pixel_array

In [15]:
def basic_preprocess(dicom_img_path, output_shape=None):
    final_image = normalize_threshold_rotate(dicom_img_path)  
    if output_shape != None:
        final_image = cv2.resize(src=final_image, dsize=output_shape, interpolation=cv2.INTER_CUBIC)
    
    return final_image

In [16]:
def dicom_to_png_no_preprocessed(files_path, destination_path, data_info, data_selection_index=0, percentiles=(40, 50, 60), out_shape=None):
    final_folder_names = []
    final_folder_names = get_folder_names(files_path, final_folder_names)  # Folder names have str data type
    final_folder_names.sort()
    
    for p_idx, pid in enumerate(final_folder_names):
        
        side = np.array(data_info[data_info.code==int(pid)].Side)[0]
        
        if (int(pid) in wrong_data) or side=='N':
            continue
            
        else:
            print(f"index: {p_idx}  |  pid: {pid}")
            fgt_path = files_path + pid + '/FGT/'
            bpe_path = files_path + pid + '/BPE/'
            
            fgt_dicom_names = get_file_names(fgt_path)  
            bpe_dicom_names = get_file_names(bpe_path)  
            fgt_dicom_names.sort()
            bpe_dicom_names.sort()
            
            index_range = np.arange(len(fgt_dicom_names))
            first_percentile_index = int(np.round(np.percentile(index_range, percentiles[0])))
            second_percentile_index = int(np.round(np.percentile(index_range, percentiles[1])))
            third_percentile_index = int(np.round(np.percentile(index_range, percentiles[2])))
            middle_index = len(fgt_dicom_names)//2
            
            create_pid_folder(destination_path, final_folder_names, data_info)
            
            if data_selection_index==0:
                for idx, data_index in enumerate(index_range):
                    fgt_file_path = fgt_path + fgt_dicom_names[data_index]
                    fgt_destination_path = destination_path + pid + '/' + str(idx) + '.png'
                    img = basic_preprocess(dicom_img_path=fgt_file_path, output_shape=out_shape)
                    save_array_as_image(img, fgt_destination_path, file_format='PNG')
                    
            elif data_selection_index==1:
                fgt_file_path_middle = fgt_path + fgt_dicom_names[middle_index]
                img_middle = basic_preprocess(dicom_img_path=fgt_file_path_middle, output_shape=out_shape)
                save_array_as_image(img_middle, destination_path + pid + '/' + '0.png', file_format='PNG')
                
            elif data_selection_index==2:
               
                fgt_file_path_first_percentile = fgt_path + fgt_dicom_names[first_percentile_index]
                fgt_file_path_second_percentile = fgt_path + fgt_dicom_names[second_percentile_index]
                fgt_file_path_third_percentile = fgt_path + fgt_dicom_names[third_percentile_index]
                
                img_first_percentile = basic_preprocess(dicom_img_path=fgt_file_path_first_percentile, output_shape=out_shape)
                img_second_percentile = basic_preprocess(dicom_img_path=fgt_file_path_second_percentile, output_shape=out_shape)
                img_third_percentile = basic_preprocess(dicom_img_path=fgt_file_path_third_percentile, output_shape=out_shape)
                
                save_array_as_image(img_first_percentile, destination_path + pid + '/' + '0.png', file_format='PNG')
                save_array_as_image(img_second_percentile, destination_path + pid + '/' + '1.png', file_format='PNG')
                save_array_as_image(img_third_percentile, destination_path + pid + '/' + '2.png', file_format='PNG')
                
            else:
                first_percentile_index = int(np.round(np.percentile(index_range, 40)))
                second_percentile_index = int(np.round(np.percentile(index_range, 45)))
                third_percentile_index = int(np.round(np.percentile(index_range, 50)))
                fourth_percentile_index = int(np.round(np.percentile(index_range, 55)))
                fifth_percentile_index = int(np.round(np.percentile(index_range, 60)))
                
                fgt_file_path_first_percentile = fgt_path + fgt_dicom_names[first_percentile_index]
                fgt_file_path_second_percentile = fgt_path + fgt_dicom_names[second_percentile_index]
                fgt_file_path_third_percentile = fgt_path + fgt_dicom_names[third_percentile_index]
                fgt_file_path_fourth_percentile = fgt_path + fgt_dicom_names[fourth_percentile_index]
                fgt_file_path_fifth_percentile = fgt_path + fgt_dicom_names[fifth_percentile_index]
                
                img_first_percentile = basic_preprocess(dicom_img_path=fgt_file_path_first_percentile, output_shape=out_shape)
                img_second_percentile = basic_preprocess(dicom_img_path=fgt_file_path_second_percentile, output_shape=out_shape)
                img_third_percentile = basic_preprocess(dicom_img_path=fgt_file_path_third_percentile, output_shape=out_shape)
                img_fourth_percentile = basic_preprocess(dicom_img_path=fgt_file_path_fourth_percentile, output_shape=out_shape)
                img_fifth_percentile = basic_preprocess(dicom_img_path=fgt_file_path_fifth_percentile, output_shape=out_shape)
                
                save_array_as_image(img_first_percentile, destination_path + pid + '/' + '0.png', file_format='PNG')
                save_array_as_image(img_second_percentile, destination_path + pid + '/' + '1.png', file_format='PNG')
                save_array_as_image(img_third_percentile, destination_path + pid + '/' + '2.png', file_format='PNG')
                save_array_as_image(img_fourth_percentile, destination_path + pid + '/' + '3.png', file_format='PNG')
                save_array_as_image(img_fifth_percentile, destination_path + pid + '/' + '4.png', file_format='PNG')

In [17]:
def save_array_as_image(array, file_path, file_format='PNG'):
    image = Image.fromarray(array)  # Convert the NumPy array to PIL image
    image.save(file_path, format=file_format)  # Save the image file

In [18]:
def sobel_edge_detection(img):

  # Apply Gaussian blur (optional)
  blurred = cv2.GaussianBlur(img, (3, 3), 0)

  # Sobel edge detection
  sobelx = cv2.Sobel(blurred, cv2.CV_64F, 1, 0, ksize=3)
  sobely = cv2.Sobel(blurred, cv2.CV_64F, 0, 1, ksize=3)

  # Calculate the gradient magnitude
  edges = cv2.magnitude(sobelx, sobely)

  # Normalize the image for display
  edges = cv2.normalize(edges, edges, 0, 255, cv2.NORM_MINMAX, cv2.CV_8U)

  return edges

In [19]:
def p_finder(img, p3_row_range=165):
    img_row, img_column = img.shape
    
    # Find P1
    p1 = [0, 0]
    flag = False
    for row in range(img_row):
        if flag:
            break
            
        for column in range(img_column//2):
            if img[row, column] != 0:
                p1 = [row, column]
                flag = True
                break
            
    # Find P2
    p2 = [0, 0]
    flag = False
    for row in range(img_row):
        if flag:
            break
    
        for column in range(img_column-1, img_column//2, -1):
            if img[row, column] != 0:
                p2 = [row, column]
                flag = True
                break
            
    # Find P3
    p3_column = (p1[1] + p2[1])//2
    p3 = [0, 0]
    for row in range(p3_row_range, img_row):
        if img[row, p3_column] != 0:
            p3 = [row, p3_column]
            break
            
    return p1, p2, p3

In [20]:
def circle_mask(central_point, img_shape=(512, 512), radius=500):

  x, y = np.ogrid[:img_shape[0], :img_shape[1]]
  dist_from_center = (x - central_point[0])**2 + (y - central_point[1])**2
  mask = dist_from_center <= radius**2
  return mask

In [21]:
def img_fit(img):
    img_row, img_column = img.shape
    p1, p2, p3 = p_finder(img)

    min_x, min_y = np.nonzero(img)[0].min(), np.nonzero(img)[1].min()
    max_x, max_y = np.nonzero(img)[0].max(), np.nonzero(img)[1].max()

    left_bound = np.nonzero(img)[1].min()
    right_bound = np.nonzero(img)[1].max()

    if p1[0] > p2[0]:
        upper_bound = p2[0]
    else:
        upper_bound = p1[0]

    return min_x, max_x, min_y, max_y

In [22]:
def side_data_seperator(files_path, slice_number, data_labels):
    folder_names = []
    folder_names = get_folder_names(files_path, folder_names)
    both_side_ok_data = []
    both_side_ok_labels = []
    
    one_side_ok_data = []
    one_side_ok_labels = []

    for i in range(len(folder_names)):
        if slice_number == 1:
            img_path = files_path + folder_names[i] + '/' + '0.png'
            img = cv2.imread(img_path)[:, :, 0]
            side = np.array(data_labels[data_labels.code == int(folder_names[i])].Side)[0]
            fgt = np.array(data_labels[data_labels.code == int(folder_names[i])].FGT)[0]
            if side == 'B':
                both_side_ok_data.append(img)
                both_side_ok_labels.append([int(folder_names[i]), side, int(fgt)])
            else:
                one_side_ok_data.append(img)
                one_side_ok_labels.append([int(folder_names[i]), side, int(fgt)])
        else:
            images = []
            side = np.array(data_labels[data_labels.code == int(folder_names[i])].Side)[0]
            fgt = np.array(data_labels[data_labels.code == int(folder_names[i])].FGT)[0]
            
            for j in range(slice_number):
                img_path = files_path + folder_names[i] + '/' + str(j) + '.png'
                img = cv2.imread(img_path)[:, :, 0]
                images.append(img)
    
            if side == 'B':
                both_side_ok_data.append(images)
                both_side_ok_labels.append([int(folder_names[i]), side, int(fgt)])
            else:
                one_side_ok_data.append(images)
                one_side_ok_labels.append([int(folder_names[i]), side, int(fgt)])

    both_side_ok_data = np.array(both_side_ok_data)
    both_side_ok_labels = np.array(both_side_ok_labels)
    
    one_side_ok_data = np.array(one_side_ok_data)
    one_side_ok_labels = np.array(one_side_ok_labels)
    
    return both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels

In [23]:
def breast_segmentor(X, y, slice_number, set_name='Train', test_set_side='Left'):
    dataset = []
    labels = []
    for i in range(len(X)):
        print(f"{i+1}/{len(X)}")
        side = y[i, 1]
        fgt = int(y[i, 2])-1
        
        if slice_number == 1:
            img = X[i, :, :]
            img_edge = sobel_edge_detection(img)
            p1, p2, p3 = p_finder(img_edge)
            
            if side == 'L':
                img_L = img[:p3[0], :p3[1]]
                min_x, max_x, min_y, max_y = img_fit(img_L)
                img_L = img_L[min_x:max_x, min_y:max_y]
                img_L = cv2.resize(img_L, (224, 224))
                dataset.append(img_L)
                labels.append(fgt)
                
            elif side == 'R':
                img_R = img[:p3[0], p3[1]:]
                min_x, max_x, min_y, max_y = img_fit(img_R)
                img_R = img_R[min_x:max_x, min_y:max_y]
                img_R = cv2.resize(img_R, (224, 224))
                dataset.append(img_R)
                labels.append(fgt)
            else:
                img_L = img[:p3[0], :p3[1]]
                min_x, max_x, min_y, max_y = img_fit(img_L)
                img_L = img_L[min_x:max_x, min_y:max_y]
                img_L = cv2.resize(img_L, (224, 224))
                
                img_R = img[:p3[0], p3[1]:]
                min_x, max_x, min_y, max_y = img_fit(img_R)
                img_R = img_R[min_x:max_x, min_y:max_y]
                img_R = cv2.resize(img_R, (224, 224))
                
                dataset.append(img_R)
                labels.append(fgt)
                dataset.append(img_L)
                labels.append(fgt)
        else:
            images_L = []
            images_R = []
            middle_slice_index = slice_number//2
            img_edge = sobel_edge_detection(X[i, middle_slice_index, :, :])
            p1, p2, p3 = p_finder(img_edge)
            
            if side == 'L':
                img = X[i, middle_slice_index, :, :]
                img_L = img[:p3[0], :p3[1]]
                min_x, max_x, min_y, max_y = img_fit(img_L)
                
                for j in range(slice_number):
                    img = X[i, j, :, :]
                    img_L = img[:p3[0], :p3[1]]
                    img_L = img_L[min_x:max_x, min_y:max_y]
                    img_L = cv2.resize(img_L, (224, 224))
                    images_L.append(img_L)
                    
                dataset.append(images_L)
                labels.append(fgt)
                
            elif side == 'R':
                img = X[i, middle_slice_index, :, :]
                img_R = img[:p3[0], p3[1]:]
                min_x, max_x, min_y, max_y = img_fit(img_R)
                
                for j in range(slice_number):
                    img = X[i, j, :, :]
                    img_R = img[:p3[0], p3[1]:]
                    img_R = img_R[min_x:max_x, min_y:max_y]
                    img_R = cv2.resize(img_R, (224, 224))
                    images_R.append(img_R)
                    
                dataset.append(images_R)
                labels.append(fgt)
                
            else:
                img = X[i, middle_slice_index, :, :]
                img_L = img[:p3[0], :p3[1]]
                min_x_l, max_x_l, min_y_l, max_y_l = img_fit(img_L)
                
                img = X[i, middle_slice_index, :, :]
                img_R = img[:p3[0], p3[1]:]
                min_x_r, max_x_r, min_y_r, max_y_r = img_fit(img_R)
                
                for j in range(slice_number):
                    img = X[i, j, :, :]
                    img_L = img[:p3[0], :p3[1]]
                    img_L = img_L[min_x_l:max_x_l, min_y_l:max_y_l]
                    img_L = cv2.resize(img_L, (224, 224))

                    img_R = img[:p3[0], p3[1]:]
                    img_R = img_R[min_x_r:max_x_r, min_y_r:max_y_r]
                    img_R = cv2.resize(img_R, (224, 224))

                    images_L.append(img_L)
                    images_R.append(img_R)
                
                if set_name == 'Train':
                    dataset.append(images_L)
                    labels.append(fgt)
                    dataset.append(images_R)
                    labels.append(fgt)
                        
                else:
                    if test_set_side == 'Left':
                        dataset.append(images_L)
                        labels.append(fgt)
                    else:
                        dataset.append(images_R)
                        labels.append(fgt)
                        
    dataset = np.array(dataset)
    labels = np.array(labels)
    
    return dataset, labels

In [24]:
def dataset_creator(both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels, test_ratio, validation_ratio, slice_number, random_state, test_set_side='Left', just_sets_info=False):
    X_train_validation_both, X_test_both, y_train_validation_both, y_test_both = train_test_split(both_side_ok_data, both_side_ok_labels, test_size=test_ratio, random_state=random_state, stratify=both_side_ok_labels[:, 2])
    
    X_train_validation_one, X_test_one, y_train_validation_one, y_test_one = train_test_split(one_side_ok_data, one_side_ok_labels, test_size=test_ratio, random_state=random_state, stratify=one_side_ok_labels[:, 2])
    
    X_train_both, X_validation_both, y_train_both, y_validation_both = train_test_split(X_train_validation_both, y_train_validation_both, test_size=validation_ratio, random_state=random_state, stratify=y_train_validation_both[:, 2])
    
    X_train_one, X_validation_one, y_train_one, y_validation_one = train_test_split(X_train_validation_one, y_train_validation_one, test_size=validation_ratio, random_state=random_state, stratify=y_train_validation_one[:, 2])
    
    X_train = np.concatenate((X_train_both, X_train_one), axis=0)
    y_train = np.concatenate((y_train_both, y_train_one), axis=0)
    
    X_validation = np.concatenate((X_validation_both, X_validation_one), axis=0)
    y_validation = np.concatenate((y_validation_both, y_validation_one), axis=0)
    
    X_test = np.concatenate((X_test_both, X_test_one), axis=0)
    y_test = np.concatenate((y_test_both, y_test_one), axis=0)
    
    if just_sets_info == False:
        train_data, train_labels = breast_segmentor(X_train, y_train, slice_number, set_name='Train')
        validation_data, validation_labels = breast_segmentor(X_validation, y_validation, slice_number, set_name='Validation', test_set_side=test_set_side)
        test_data, test_labels = breast_segmentor(X_test, y_test, slice_number, set_name='Test', test_set_side=test_set_side)
    
        return train_data, validation_data, test_data, train_labels, validation_labels, test_labels
    
    else:
        return y_train, y_validation, y_test

In [25]:
def resample_img(itk_image, out_spacing=(2.0, 2.0, 2.0), is_label=False):
    
    original_spacing = itk_image.GetSpacing()
    original_size = itk_image.GetSize()

    out_size = [
        int(np.round(original_size[0] * (original_spacing[0] / out_spacing[0]))),
        int(np.round(original_size[1] * (original_spacing[1] / out_spacing[1]))),
        int(np.round(original_size[2] * (original_spacing[2] / out_spacing[2])))]

    resample = sitk.ResampleImageFilter()
    resample.SetOutputSpacing(out_spacing)
    resample.SetSize(out_size)
    resample.SetOutputDirection(itk_image.GetDirection())
    resample.SetOutputOrigin(itk_image.GetOrigin())
    resample.SetTransform(sitk.Transform())
    resample.SetDefaultPixelValue(itk_image.GetPixelIDValue())

    if is_label:
        resample.SetInterpolator(sitk.sitkNearestNeighbor)
    else:
        resample.SetInterpolator(sitk.sitkBSpline)

    return resample.Execute(itk_image)

In [26]:
def p_finder_3D(patient_data_array, side):
    start_zero_idx = 0
    end_zero_idx = 96
    
    for i in range(96):
        if np.sum(patient_data_array[i, :, :])==0:
            continue
        else:
            start_zero_idx = i
            break
    
    for i in range(95, -1, -1):
        if np.sum(patient_data_array[i, :, :])==0:
            continue
        else:
            end_zero_idx = i
            break
    
    middle_slice = (end_zero_idx-start_zero_idx)//2 + start_zero_idx
    img = patient_data_array[middle_slice, :, :]
    p1, p2, p3 = p_finder(img, p3_row_range=0)
    
    if side == 'L':
        return p1, p3
    elif side == 'R':
        return p2, p3
    else:
        return p1, p2, p3

In [27]:
def get_middle_slice_index(patient_data_array):
    start_zero_idx = 0
    end_zero_idx = 96
    
    for i in range(96):
        if np.sum(patient_data_array[i, :, :])==0:
            continue
        else:
            start_zero_idx = i
            break
    
    for i in range(95, -1, -1):
        if np.sum(patient_data_array[i, :, :])==0:
            continue
        else:
            end_zero_idx = i
            break
    
    middle_slice = (end_zero_idx-start_zero_idx)//2 + start_zero_idx
    
    return middle_slice

In [28]:
def normalize_threshold_rotate_3d(patient_data_array):
    t = filters.threshold_li(patient_data_array)
    patient_data_array[patient_data_array<t] = 0
    patient_data_array = exposure.rescale_intensity(patient_data_array, out_range=np.uint8)
    patient_data_array = np.flip(patient_data_array, axis=1)
    
    return patient_data_array

In [29]:
def center_image_3d(image):
    zeros_array = np.zeros((96, 140, 140), dtype=np.uint8)
    
    _, zeros_height, zeros_width = zeros_array.shape
    _, image_height, image_width = image.shape
    
    # Calculate top-left corner coordinates for centering the image
    top_left_y = (zeros_height - image_height) // 2
    top_left_x = (zeros_width - image_width) // 2
    
    # Center the image for each channel
    for channel in range(image.shape[0]):
        zeros_array[channel, top_left_y:top_left_y+image_height, top_left_x:top_left_x+image_width] = image[channel, :, :]
    
    return zeros_array

In [30]:
def create_3d_train_set(data_path, set_info):
    data_3d = []
    labels = []
    
    for i in range(len(set_info)):
        print(f"{i+1}/{len(set_info)}")
        data_sample_path = data_path + set_info[i, 0] + ".nrrd"
        patient_data = sitk.ReadImage(data_sample_path)
        patient_data_array = sitk.GetArrayFromImage(patient_data) # shape: (S, W, H)
        patient_data_array = normalize_threshold_rotate_3d(patient_data_array)
        
        side = set_info[i, 1]
    
        if side == 'L':
            p1, p3 = p_finder_3D(patient_data_array, side)
            patient_data_array_l = patient_data_array[:, p1[0]:p3[0], 0:p3[1]]
            patient_data_array_l = center_image_3d(patient_data_array_l)
            data_3d.append(patient_data_array_l)
            labels.append(int(set_info[i, 2])-1)
                    
        elif side == 'R':
            p2, p3 = p_finder_3D(patient_data_array, side)
            patient_data_array_r = patient_data_array[:, p2[0]:p3[0], p3[1]:]
            patient_data_array_r = center_image_3d(patient_data_array_r)
            data_3d.append(patient_data_array_r)
            labels.append(int(set_info[i, 2])-1)
        
        else:
            p1, p2, p3 = p_finder_3D(patient_data_array, side)
            patient_data_array_l = patient_data_array[:, p1[0]:p3[0], 0:p3[1]]
            patient_data_array_l = center_image_3d(patient_data_array_l)
            data_3d.append(patient_data_array_l)
            labels.append(int(set_info[i, 2])-1)
            
            patient_data_array_r = patient_data_array[:, p2[0]:p3[0], p3[1]:]
            patient_data_array_r = center_image_3d(patient_data_array_r)
            data_3d.append(patient_data_array_r)
            labels.append(int(set_info[i, 2])-1)
            
    data_3d = np.array(data_3d)
    labels = np.array(labels)
         
    return data_3d, labels

In [31]:
def create_3d_test_validation(data_path, set_info):
    data_3d_left = []
    data_3d_right = []
    labels = []
    
    data_path = "E:/Resampled ALL Dimensions 3D NRRD Dataset/"
    
    for i in range(len(set_info)):
        print(f"{i+1}/{len(set_info)}")
        data_sample_path = data_path + set_info[i, 0] + ".nrrd"
        patient_data = sitk.ReadImage(data_sample_path)
        patient_data_array = sitk.GetArrayFromImage(patient_data) # shape: (S, W, H)
        patient_data_array = normalize_threshold_rotate_3d(patient_data_array)
        
        side = set_info[i, 1]
        
        if side == 'L':
            p1, p3 = p_finder_3D(patient_data_array, side)
            patient_data_array_l = patient_data_array[:, p1[0]:p3[0], 0:p3[1]]
            patient_data_array_l = center_image_3d(patient_data_array_l)
            data_3d_left.append(patient_data_array_l)
            data_3d_right.append(patient_data_array_l)
            labels.append(int(set_info[i, 2])-1)
                    
        elif side == 'R':
            p2, p3 = p_finder_3D(patient_data_array, side)
            patient_data_array_r = patient_data_array[:, p2[0]:p3[0], p3[1]:]
            patient_data_array_r = center_image_3d(patient_data_array_r)
            data_3d_left.append(patient_data_array_r)
            data_3d_right.append(patient_data_array_r)
            labels.append(int(set_info[i, 2])-1)
        
        else:
            p1, p2, p3 = p_finder_3D(patient_data_array, side)
        
            patient_data_array_l = patient_data_array[:, p1[0]:p3[0], 0:p3[1]]
            patient_data_array_l = center_image_3d(patient_data_array_l)
            data_3d_left.append(patient_data_array_l)

            patient_data_array_r = patient_data_array[:, p2[0]:p3[0], p3[1]:]
            patient_data_array_r = center_image_3d(patient_data_array_r)
            data_3d_right.append(patient_data_array_r)
            
            labels.append(int(set_info[i, 2])-1)
            
    data_3d_left = np.array(data_3d_left)
    data_3d_right = np.array(data_3d_right)
    labels = np.array(labels)

    return data_3d_right, data_3d_left, labels

# Read The SPSS File That Contains Patients' Full Information

In [32]:
data, meta = pyreadstat.read_sav('Full_Patients_Information.sav')

# Read The CSV File That Contains Patients' Necessary Information
## This file must have:
* Patients' IDs
* FGT and BPE Diagnosis
* Contralateral Breast Side

In [33]:
data_labels = pd.read_csv('data_labels.csv')

# Create The List of Poor-Quality MRI Scans To Be Excluded

In [31]:
# The quality of these patients’ images is low.
wrong_data = ["Patients' ID's"]

# Data Preprocessing
## Step 1
### At this step, we get the unique patient IDs and remove data with multiple diagnosis.
#### Whenever you want to add new data, add the path here and just run next two code cells.
#### Then create a new part and enter the codes just like the other parts.

In [None]:
folder_names = []

mpath1 = 'H:/Old Breast MRI Data/E/Breast Data/Breast'
folder_names = get_folder_names(mpath1, folder_names)

mpath2 = 'H:/Old Breast MRI Data/E/Breast Data/breast newNAC'
folder_names = get_folder_names(mpath2, folder_names)

mpath3 = 'H:/Old Breast MRI Data/G/01/sps'
folder_names = get_folder_names(mpath3, folder_names)

mpath10 = 'H:/Old Breast MRI Data/H/02'
folder_names = get_folder_names(mpath10, folder_names)

mpath4 = 'H:/Old Breast MRI Data/H/Breast MRI/03/breast new'
folder_names = get_folder_names(mpath4, folder_names)

mpath5 = 'H:/Old Breast MRI Data/H/Breast MRI/03/breast new 2'
folder_names = get_folder_names(mpath5, folder_names)

mpath6 = 'H:/Old Breast MRI Data/H/Breast MRI/03/nac'
folder_names = get_folder_names(mpath6, folder_names)

mpath7 = 'H:/Old Breast MRI Data/H/Breast MRI/03/sps'
folder_names = get_folder_names(mpath7, folder_names)

mpath8 = 'H:/Old Breast MRI Data/G/Breast Data/nac'
folder_names = get_folder_names(mpath8, folder_names)

mpath9 = 'H:/Old Breast MRI Data/G/Breast Data/sps'
folder_names = get_folder_names(mpath9, folder_names)

mpath11 = 'I:/New Breast MRI'
folder_names = get_folder_names(mpath11, folder_names)

mpath12 = 'H:/New MRI'
folder_names = get_folder_names(mpath12, folder_names)

### Early Analysis For Data Cleaning

In [None]:
folder_names = np.array(folder_names)
folder_names_unique = np.unique(folder_names)
data_codes = np.array(data.code)
data_codes_unique = np.unique(data_codes)

same_codes_unique = []
for i in folder_names_unique:
    if i in data_codes_unique:
        same_codes_unique.append(i)
same_codes_unique = np.array(same_codes_unique)

not_in_dataset = []
for i in data_codes_unique:
    if i not in same_codes_unique:
        not_in_dataset.append(i)
not_in_dataset_unique = np.unique(np.array(not_in_dataset))

same_codes_unique_mdd_removed = []
for i in same_codes_unique:
    if np.array(data[data.code == i].FG).shape[0] > 1:
        fg_i = np.array(data[data.code == i].FG)
        bpe_i = np.array(data[data.code == i].BPE)
        if np.all(fg_i==fg_i[0]) and np.all(bpe_i==bpe_i[0]):
            same_codes_unique_mdd_removed.append(i)
    else:
        same_codes_unique_mdd_removed.append(i)

not_in_dataset_unique_mdd_removed = []
for i in not_in_dataset_unique:
    if np.array(data[data.code == i].FG).shape[0] > 1:
        fg_i = np.array(data[data.code == i].FG)
        bpe_i = np.array(data[data.code == i].BPE)
        if np.all(fg_i==fg_i[0]) and np.all(bpe_i==bpe_i[0]):
            not_in_dataset_unique_mdd_removed.append(i)
    else:
        not_in_dataset_unique_mdd_removed.append(i)

all_ok_data = same_codes_unique_mdd_removed + not_in_dataset_unique_mdd_removed

len(all_ok_data)

# Part 1

In [None]:
folder_names_part1 = []
folder_names_part1 = get_folder_names(mpath3, folder_names_part1)

In [None]:
for name in folder_names_part1:
    if name not in all_ok_data:
        print(name)

In [None]:
no_data_cases = ["Patients' ID's"]

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath3 + '/'

In [None]:
patients_ids_1 = ["Patients' ID's"]

In [None]:
fgt_1 = [(300, 451), (373, 458), (558, 685), (396, 481), (536, 621), (379, 550), (434, 517), (335, 478), (582, 661), (587, 662),
         (303, 454), (348, 523), (533, 618), (365, 440), (514, 599), (335, 510), (405, 548), (589, 674), (505, 590), (348, 499),
         (521, 606), (387, 542), (407, 492), (554, 639), (365, 450), (365, 450), (432, 513), (490, 567), (371, 442), (464, 549),
         (365, 524), (365, 528), (422, 573), (338, 485), (591, 676), (493, 668), (393, 560), (432, 587), (595, 758), (325, 448),
         (365, 528), (365, 438), (302, 367), (365, 430), (365, 438), (501, 586), (569, 654), (529, 614), (277, 356), (277, 352),
         (281, 366), (249, 314), (490, 575), (420, 505), (277, 352), (536, 679), (277, 352), (421, 506), (474, 559), (439, 524), 
         (441, 526), (471, 560), (315, 406), (449, 536), (421, 510), (466, 551), (332, 499), (269, 354), (433, 518), (479, 650), 
         (461, 546), (435, 520), (552, 735), (431, 518), (443, 530), (423, 586), (402, 545), (485, 628), (380, 523), (430, 515), 
         (496, 671), (597, 800), (385, 464), (451, 540), (299, 384), (313, 404), (315, 402), (317, 404), (435, 520), (448, 639), 
         (612, 771), (439, 526), (517, 602), (500, 585), (405, 490), (555, 640), (426, 513), (297, 382), (277, 350), (506, 649), 
         (464, 549), (454, 539), (455, 598), (471, 556), (435, 598), (407, 554), (426, 577), (299, 384), (467, 662), (421, 612), 
         (376, 551), (258, 343), (275, 450), (429, 620), (337, 512), (500, 675), (350, 525), (396, 575), (406, 581), (405, 490), 
         (380, 555), (413, 604), (393, 568), (417, 592), (383, 558), (423, 508), (404, 579), (417, 640), (461, 604), (402, 577), 
         (286, 461), (359, 534), (587, 762), (399, 574), (385, 560), (420, 607), (370, 545), (417, 502), (405, 490), (395, 570), 
         (552, 727), (363, 538), (567, 718), (369, 544), (407, 582), (415, 602), (393, 568), (426, 601), (385, 560), (389, 564), 
         (344, 519), (355, 530), (428, 619), (561, 756), (397, 572), (670, 857), (402, 577), (356, 519), (412, 591), (366, 541),
         (366, 541), (343, 518), (383, 554), (394, 585), (414, 593), (412, 587), (423, 618), (453, 628), (359, 534), (451, 626),
         (390, 565), (439, 614), (385, 576), (312, 475), (497, 672), (434, 625), (410, 585), (946, 1145), (904, 1095), (868, 1059),
         (347, 522), (314, 489), (387, 562), (395, 570), (348, 523), (396, 587), (380, 559), (588, 803), (301, 460), (593, 772),
         (384, 559)]

In [None]:
bpe_1 = [(1060, 1211), (920, 1005), (2589, 2716), (914, 999), (1162, 1247), (1615, 1786), (1042, 1125), (1235, 1378), (1054, 1133),
         (1035, 1110), (1243, 1394), (1408, 1583), (1071, 1156), (813, 888), (1022, 1107), (1395, 1570), (1349, 1492), (1157, 1242),
         (1061, 1146), (1288, 1439), (1047, 1132), (1323, 1478), (925, 1010), (1080, 1165), (886, 971), (915, 1000), (928, 1009),
         (978, 1055), (820, 891), (980, 1065), (1233, 1392), (1253, 1416), (1252, 1403), (1146, 1293), (1141, 1226), (1441, 1616),
         (1301, 1468), (1280, 1435), (1699, 1862), (1225, 1348), (1253, 1416), (803, 876), (700, 765), (763, 828), (803, 876),
         (1007, 1092), (1107, 1192), (1063, 1148), (769, 848), (749, 824), (803, 888), (671, 736), (1032, 1117), (954, 1039),
         (749, 824), (2267, 2410), (749, 824), (959, 1044), (1012, 1097), (977, 1062), (979, 1064), (1025, 1114), (889, 980), 
         (1002, 1089), (978, 1067), (1015, 1100), (1238, 1405), (783, 868), (1203, 1288), (2382, 2553), (1126, 1211), (969, 1054), 
         (1872, 2055), (984, 1071), (990, 1077), (1423, 1586), (1278, 1421), (1373, 1516), (1256, 1399), (1228, 1313), (1768, 1943), 
         (2001, 2204), (869, 948), (1011, 1100), (849, 934), (880, 971), (868, 955), (871, 958), (957, 1042), (1816, 2007), 
         (2760, 2919), (971, 1058), (1150, 1235), (1155, 1240), (927, 1012), (1227, 1312), (1065, 1152), (834, 919), (754, 827), 
         (1226, 1369), (1161, 1246), (1107, 1192), (1487, 1630), (1108, 1193), (1411, 1574), (1303, 1450), (1342, 1493), (845, 930), 
         (1859, 2054), (1813, 2004), (1648, 1823), (792, 877), (1511, 1686), (1833, 2024), (1579, 1754), (1736, 1911), (1610, 1785), 
         (1716, 1895), (1646, 1821), (936, 1021), (1616, 1791), (1793, 1984), (1635, 1810), (1725, 1900), (1643, 1818), (957, 1042), 
         (1820, 1995), (2723, 2946), (1337, 1480), (1746, 1921), (1522, 1697), (1667, 1842), (1859, 2034), (1671, 1846), (1657, 1832), 
         (1788, 1975), (1610, 1785), (948, 1033), (939, 1024), (1631, 1806), (1788, 1963), (1683, 1858), (1327, 1478), (1605, 1780), 
         (1651, 1826), (1731, 1918), (1635, 1810), (1914, 2089), (1657, 1832), (1673, 1848), (1580, 1755), (1651, 1826), (1820, 2011),
         (1977, 2172), (1653, 1828), (2640, 2827), (1638, 1813), (1520, 1683), (1708, 1887), (1602, 1777), (1602, 1777), (1603, 1778),
         (1595, 1766), (1762, 1953), (1746, 1925), (1684, 1859), (1803, 1998), (1689, 1864), (1667, 1842), (1687, 1862), (1630, 1805),
         (1681, 1856), (1753, 1944), (1464, 1627), (1733, 1908), (1814, 2005), (1652, 1827), (2580, 2779), (2464, 2655), (2408, 2599),
         (1583, 1758), (1370, 1545), (1631, 1806), (1639, 1814), (1590, 1765), (1752, 1943), (2290, 2469), (2842, 3057), (1441, 1600),
         (1861, 2040), (1626, 1801)]

In [None]:
slice_checker(fgt_1, bpe_1, patients_ids_1)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_1, fgt_1, bpe_1)

# Part 2

In [None]:
mpath = 'H:/Breast MRI/02/sps'
folder_names_part2 = []
folder_names_part2 = get_folder_names(mpath, folder_names_part2)

In [None]:
for name in folder_names_part2:
    if name not in all_ok_data:
        print(name)

In [None]:
no_data_cases = ["Patients' ID's"]

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps/'

In [None]:
patients_ids_2 = ["Patients' ID's"]

In [None]:
fgt_2 = [(413, 498), (405, 490), (315, 478), (374, 549), (348, 523), (382, 533), (385, 560), (303, 466), (481, 566)]

In [None]:
bpe_2 = [(953, 1038), (957, 1042), (1135, 1298), (1434, 1609), (1408, 1583), (1322, 1473), (1445, 1620), (1303, 1466), (1123, 1208)]

In [None]:
slice_checker(fgt_2, bpe_2, patients_ids_2)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_2, fgt_2, bpe_2)

# Part 3

In [None]:
folder_names_part3 = []
folder_names_part3 = get_folder_names(mpath4, folder_names_part3)

In [None]:
for name in folder_names_part3:
    if name not in all_ok_data:
        print(name)

In [None]:
no_data_cases = []

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath4 + '/'

In [None]:
patients_ids_3 = ["Patients' ID's"]

In [None]:
fgt_3 = [(333, 500), (612, 771), (506, 649), (881, 1072), (982, 1173), (1287, 1478), (1017, 1208), (1190, 1389), (1033, 1224), (991, 1182), 
         (1052, 1243), (1160, 1351), (978, 1169), (879, 1038), (1027, 1218), (1224, 1415), (974, 1153), (1694, 1885), (498, 641), (1134, 1325), 
         (1155, 1346), (1174, 1365), (1174, 1365), (1250, 1441), (1181, 1372)]

In [None]:
bpe_3 = [(1999, 2166), (2760, 2919), (1226, 1369), (2225, 2416), (2356, 2547), (3133, 3324), (2391, 2582), (3269, 3468), (2577, 2768), 
         (2941, 3132), (2558, 2749), (2688, 2879), (2928, 3119), (2687, 2846), (2571, 2762), (2776, 2967), (2430, 2609), (3250, 3441), 
         (2396, 2539), (2688, 2879), (2567, 2758), (2718, 2909), (2729, 2920), (2824, 3015), (2785, 2976)]

In [None]:
slice_checker(fgt_3, bpe_3, patients_ids_3)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_3, fgt_3, bpe_3)

# Part 4

In [None]:
folder_names_part4 = []
folder_names_part4 = get_folder_names(mpath5, folder_names_part4)

In [None]:
folder_list = []
for name in folder_names_part4:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
no_data_cases = []

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath5 + '/'

In [None]:
patients_ids_4 = ["Patients' ID's"]

In [None]:
fgt_4 = [(1263, 1454), (1141, 1332), (2281, 2484), (1865, 2068), (867, 1006), (1204, 1395), (1246, 1437), (1177, 1380), (998, 1189),
         (1178, 1389), (916, 1107), (1397, 1576), (1320, 1503), (861, 1012), (790, 965), (1229, 1444), (896, 1087), (769, 940),
         (755, 906), (841, 1004), (979, 1170), (1226, 1417), (1362, 1553), (1054, 1245), (514, 669), (700, 867), (1050, 1241),
         (946, 1145), (904, 1095), (868, 1059), (400, 547), (1124, 1303), (592, 787)]

In [None]:
bpe_4 = [(2813, 3004), (3200, 3391), (4025, 4228), (3648, 3851), (2033, 2172), (2768, 2959), (2796, 2987), (2810, 3013), (2542, 2733),
         (2881, 3092), (2460, 2651), (2849, 3028), (2810, 2993), (2133, 2284), (3467, 3642), (2967, 3182), (2440, 2631), (2173, 2344),
         (2021, 2172), (2219, 2382), (2556, 2747), (2770, 2961), (2900, 3091), (2628, 2819), (2168, 2323), (2490, 2657),
         (3962, 4153), (2580, 2779), (2464, 2655), (2408, 2599), (1946, 2093), (2598, 2777), (2638, 2833)]

In [None]:
slice_checker(fgt_4, bpe_4, patients_ids_4)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_4, fgt_4, bpe_4)

# Part 5

In [None]:
folder_names_part5 = []
folder_names_part5 = get_folder_names(mpath6, folder_names_part5)

In [None]:
folder_list = []
for name in folder_names_part5:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
no_data_cases = []

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath6 + '/'

In [None]:
patients_ids_5 = ["Patients' ID's"]

In [None]:
fgt_5 = [(941,1132), (982,1173), (1287,1478), (1017,1208), (1190,1389), (1033,1224),
 (991,1182), (1052,1243), (1160,1351), (978,1169),(879,1038), (1027,1218), (1224,1415), (974,1153), (1694,1885),
 (1134,1325), (1155,1346), (1174,1365), (1174,1365), (1250,1441), (405,490), (1181,1372), (1038,1229), (1007,1194),
 (631,806), (963,1154), (536,679), (1141,1332), (2281,2484), (1865,2068), (867,1006), (1204,1395), (1246,1437),
 (1177,1380), (998,1189), (1178,1389), (916,1107), (1397,1576), (896,1087)
 ]

In [None]:
bpe_5 = [(2285,2476), (2416,2607), (3199,3390), (2451,2642), (3269,3468), (2577,2768), (2941,3132), (2558,2749), (2688,2879), (2928,3119), (2687,2846), 
(2571,2762), (2776,2967), (2430,2609), (3250,3441), (2688,2879),(2567,2758), (2718,2909), (2729,2920), (2824,3015), (957,1042), (2785,2976), (2582,2773),
(2523,2710), (1915,2090), (2507,2698), (2116,2259), (3200,3391), (4025,4228), (3648,3851), (2033,2172), (2768,2959), (2796,2987), (2810,3013), (2542,2733),
(2881,3092), (2460,2651), (2849,3028), (2440,2631)
]

In [None]:
slice_checker(fgt_5, bpe_5, patients_ids_5)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_5, fgt_5, bpe_5)

# Part 6

In [None]:
folder_names_part6 = []
folder_names_part6 = get_folder_names(mpath7, folder_names_part6)

In [None]:
folder_list = []
for name in folder_names_part6:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
no_data_cases = ["Patients' ID's"]

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath7 + '/'

In [None]:
patients_ids_6 = ["Patients' ID's"]

In [None]:
fgt_6 = [(364,515), (658,833), (356,413), (321,472), (536,679), (536,623), (348,523), (345,512), (356,531), (518,615), (348,523),
         (387,472), (396,481), (338,501), (605,690), (622,707), (340,423), (431,516), (365,450), (387,472), (269,354), (365,450),
         (509,594), (365,450), (505,590), (454,625), (335,502), (348,523), (549,634), (525,610), (545,688), (509,594), (378,463),
         (482,653), (625,710), (521,606), (580,665), (529,614), (320,471), (430,561), (344,479), (427,512), (365,442), (378,453),
         (350,517), (509,594), (421,506), (394,479), (397,482), (403,488), (661,746), (409,494), (332,475), (397,540), (371,522),
         (411,562), (568,653), (335,498), (421,506), (393,536), (368,511), (809,894), (393,478), (467,556), (551,694), (365,430),
         (439,524), (450,535), (365,450), (451,536), (537,622), (427,512), (365,450), (330,481), (549,634), (529,614), (375,550),
         (365,430), (387,474), (399,542), (413,572), (391,460), (541,626), (597,682), (521,606), (373,458), (365,432), (401,486),
         (517,602), (521,606), (633,718), (377,528), (348,503), (554,639), (574,659), (417,502), (411,496), (509,594), (489,574),
         (396,481), (403,472), (275,350), (301,394), (405,470), (277,352), (273,348), (415,504), (459,544), (505,590), (519,604),
         (273,344)]

In [None]:
bpe_6 = [(1124,1275), (1606,1781), (782,839), (1081,1232), (2092,2235), (1056,1143), (1408,1583), (1245,1412), (1416,1591),
         (1206,1303), (1408,1583), (885,970), (894,979), (1338,1501), (1103,1188), (1124,1209), (848,931), (1317,1402), (1262,1347),
         (971,1056), (699,784), (1247,1332), (1458,1543), (1126,1211), (1015,1100), (1554,1725), (1523,1690), (1408,1583), (1150,1235),
         (1081,1166), (1601,1744), (1151,1236), (876,961), (1410,1581), (1205,1290), (1083,1168), (1198,1283), (1105,1190), (1236,1387),
         (1402,1533), (1340,1475), (1102,1187), (823,900), (826,901), (1370,1537), (1023,1108), (939,1024), (1123,1208), (1202,1287),
         (1244,1329), (1211,1296), (951,1036), (1208,1351), (1273,1416), (1287,1438), (1375,1526), (1106,1191), (1335,1498), (1101,1186),
         (1269,1412), (1244,1387), (1515,1600), (904,989), (1228,1317), (1427,1570), (763,828), (981,1066), (948,1033), (1101,1186),
         (1209,1294), (1079,1164), (925,1010), (863,948), (1270,1421), (1111,1196), (1063,1148), (1435,1610), (1089,1154), (908,995),
         (1275,1418), (1369,1528), (809,878), (1087,1172), (1135,1220), (1047,1132), (907,992), (1126,1193), (914,999), (1039,1124),
         (1047,1132), (1179,1264), (1293,1444), (1308,1463), (1080,1165), (1120,1205), (943,1028), (933,1018), (1242,1327), (1256,1341),
         (937,1022), (845,914), (747,822), (863,956), (842,907), (749,824), (745,820), (969,1058), (1136,1221), (1170,1255), (1073,1158),
         (725,796)]

In [None]:
slice_checker(fgt_6, bpe_6, patients_ids_6)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_6, fgt_6, bpe_6)

# Part 7

In [None]:
folder_names_part7 = []
folder_names_part7 = get_folder_names(mpath1, folder_names_part7)

In [None]:
folder_list = []
for name in folder_names_part7:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
no_data_cases = []

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath1 + '/'

In [None]:
patients_ids_7 = ["Patients' ID's"]

In [None]:
fgt_7 = [(1038,1229), (1007,1194), (963,1154), (1141,1332), (2281,2484), (1865,2068), (867,1006), (1204,1395), (1246,1437), (1177,1380), (998,1189), 
(1178,1389), (916,1107), (1397,1576), (1258,1441), (805,956), (736,911), (1155,1370), (832,1023), (812,1003), (721,892), (707,858), (783,946), (907,1098), (1146,1337), (1292,1483), (514,669), (700,867), (970,1161)]

In [None]:
bpe_7 = [(2582,2773), (2523,2710), (2507,2698), (3200,3391), (4025,4228), (3648,3851), (2033,2172), (2768,2959), (2796,2987), (2810,3013), (2542,2733),
(2881,3092), (2460,2651), (2849,3028), (2748,2931), (2077,2228), (3413,3588), (2893,3108), (2376,2567), (2360,2551), (2125,2296), (1973,2124), (2161,2324), (2484,2675), (2690,2881), (2830,3021), (2168,2323), (2490,2657), (3882,4073) ]

In [None]:
slice_checker(fgt_7, bpe_7, patients_ids_7)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_7, fgt_7, bpe_7)

# Part 8

In [None]:
folder_names_part8 = []
folder_names_part8 = get_folder_names(mpath2, folder_names_part8)

In [None]:
folder_list = []
for name in folder_names_part8:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
no_data_cases = []

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath2 + '/'

In [None]:
patients_ids_8 = ["Patients' ID's"]

In [None]:
fgt_8 = [(364,515), (280,411), (300,451), (288,439), (304,467), (284,435), (290,421), (316,479), (300,451), (258,377), (329,500), (315,478), (348,531), (300,451), (300,451), (321,488), (1181,1372), (1038,1229), (876,1067), (380,559), (588,803)]

In [None]:
bpe_8 = [(1124,1275), (940,1071), (1880,2031), (1081,1232), (1176,1339), (1088,1239), (950,1081), (1136,1299), (1060,1211), (858,977), (1189,1360), (1135,1298), (1268,1451), (1060,1211), (1060,1211), (1413,1580), (2785,2976), (2582,2773), (2424,2615), (2290,2469), (2842,3057) ]

In [None]:
slice_checker(fgt_8, bpe_8, patients_ids_8)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_8, fgt_8, bpe_8)

# Part 9

In [None]:
folder_names_part9 = []
folder_names_part9 = get_folder_names(mpath8, folder_names_part9)

In [None]:
folder_list = []
for name in folder_names_part9:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
no_data_cases = []

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath8 + '/'

In [None]:
patients_ids_9 = ["Patients' ID's"]

In [None]:
fgt_9 = [(1019,1210), (861,1012), (790,965), (1229,1444), (876,1067), (769,940), (755,906), (841,1004), (979,1170), (1226,1417), (1362,1553), (1050,1241), (946,1145), (868,1059)]

In [None]:
bpe_9 = [(2567,2758), (2133,2284), (3467,3642), (2967,3182), (2424,2615), (2173,2344), (2021,2172), (2219,2382), (2556,2747), (2770,2961), (2900,3091), (3962,4153), (2580,2779), (2408,2599)]

In [None]:
slice_checker(fgt_9, bpe_9, patients_ids_9)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_9, fgt_9, bpe_9)

# Part 10

In [None]:
folder_names_part10 = []
folder_names_part10 = get_folder_names(mpath9, folder_names_part10)

In [None]:
folder_list = []
for name in folder_names_part10:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
no_data_cases = ['8867592']

In [None]:
len(all_ok_data)

In [None]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [None]:
len(all_ok_data)

In [None]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = mpath9 + '/'

In [None]:
patients_ids_10 = ["Patients' ID's"]

In [None]:
fgt_10 = [(368,511), (400,567), (753,920), (424,567), (442,585), (321,488), (486,561), (485,570), (425,604), (372,455), (417,494), (641,720), (396,483), (654,739), (525,610), (538,623), (552,637), (453,652), (365,532), (408,473), (521,606), (558,643), (550,635), (434,519), (499,560),  (493,564),
(529,614), (277,362), (471,572), (405,490), (381,444), (405,490), (293,378), (310,387), (275,352), (396,481), (277,362), (277,362), (405,490), (419,504), (405,482), (465,554), (311,396), (405,490), (361,504), (430,593), (344,475), (639,782), (991,1182), (1052,1243), (978,1169), (304,415),
(406,593), (395,574), (347,522), (347,522), (275,450), (427,618), (316,491), (395,578), (282,457), (532,699), (428,615), (414,605), (503,654), (325,500), (336,511), (377,552), (402,585), (396,595), (677,872), (323,462), (333,508), (393,568), (355,530), (360,535), (325,476), 
(336,503), (461,636), (333,508), (404,579), (384,559), (383,558), (405,490), (386,561), (407,582), (516,703)
]

In [None]:
bpe_10 = [(1930,2073), (1300,1467), (1653,1820), (2130,2273), (2301,2444), (1413,1580), (950,1025), (1090,1175), (1786,1965), (916,999), (1322,1399), (1144,1223), (938,1025), (1355,1440), (1055,1140), (1068,1153), (1078,1163), (1893,2092), (1273,1440), (806,871), (1069,1154), (1088,1173), (1092,1177), (946,1031), (901,962), (945,1016),
(1448,1533), (799,884), (1101,1202), (951,1036), (813,876), (1082,1167), (1111,1196), (792,869), (757,834), (1078,1163), (799,884), (799,884), (927,1012), (977,1062), (887,964), (1035,1124), (854,939), (939,1024), (1237,1380), (1406,1569), (1160,1291), (2510,2653), (2941,3132), (2558,2749), (2928,3119), (864,975),
(2392,2579), (1679,1858), (1619,1794), (1619,1794), (1511,1686), (1831,2022), (1552,1727), (1739,1922), (1518,1693), (1804,1971), (2434,2621), (1818,2009), (1419,1570), (1561,1736), (1608,1783), (1637,1812), (1734,1917), (2473,2672), (2105,2300), (1307,1446), (1569,1744), (1737,1912), (1651,1826), (1596,1771), (1417,1568), 
(2082,2249), (1706,1881), (1569,1744), (1640,1815), (2093,2268), (1635,1810), (939,1024), (1802,1977), (1739,1914), (1860,2047) 
]

In [None]:
slice_checker(fgt_10, bpe_10, patients_ids_10)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_10, fgt_10, bpe_10)

# Part 11

In [None]:
folder_names_part11 = []
folder_names_part11 = get_folder_names(mpath11, folder_names_part11)

In [None]:
folder_list = []
for name in folder_names_part11:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
# Origin and destination directory
d_directory = 'E:/New Selected Data/'
o_directory = mpath11 + '/'

In [None]:
patients_ids_11 = ["Patients' ID's"]

In [None]:
fgt_11 = [
    (319, 404), (287, 372), (277, 362), (299, 384), (397, 474), (405, 490), (277, 362), (301, 386), (377, 462), (434, 519), (355, 518), (667, 810), (496, 639), (369, 454), (451, 536), (388, 473)
    , (655, 798), (450, 629), (507, 650),(487, 658), (409, 494), (525, 616), (433, 522), (373, 458), (469, 562), (790, 933), (749, 904), (441, 624), (295, 380), (553, 744), (794, 993), (571, 746)
    , (315, 400), (808, 975), (397, 482), (415, 500), (649, 852), (348, 531), (420, 599), (506, 681), (381, 556), (433, 518), (663, 748), (525, 668), (410, 593), (379, 554), (447, 626), (468,659)
    , (363,448), (417, 502), (455, 546), (472, 559), (383, 546), (392, 477), (552, 637), (381, 466), (649, 734), (366, 443), (365, 450), 
    (452,595), (464, 553), (1280, 1471), (443, 614), (391, 558), (610, 785), 
    
    (348, 523), (431, 520), (415, 500), (444, 529),
    (397, 576), (379, 558), (591, 766), (531, 674), 
    (541, 704), (477, 620), (457, 644), (393, 568), 
    (421, 600), (393, 572), (417, 600), (409, 494), 
    (573, 716), (706, 885), (327, 502), (405, 490),
    (405, 490), (405, 490), (454, 637), (401, 580),
    (453, 632), (418, 593), (393, 472), (277, 358),
    (435, 524), (473, 592), (500, 631), (509, 652),
    (576, 727), (429, 514), (467, 662), (425, 600),
    (311, 486), (375, 550), (335, 510), (300, 427),
    (381, 556), (351, 526), (601, 776), (341, 516),
    (414, 593), (449, 540), (441, 526), (405, 490),
    (402, 577), (343, 502), (389, 572), (354, 505),
    (343, 482), (347, 522), (430, 625), (352, 527),
    (320, 455), (549, 712), (364, 515), (509, 716),
    (579, 750), (369, 544), (327, 466), (347, 502),
    (338, 493), (415, 500), (526, 677), (390, 565),
    (343, 518), (385, 560), (394, 585), (476, 695),
    (362, 537), (405, 490), (314, 489), (394, 569),
    (421, 612), (418, 597), (383, 558), (423, 598),
    (343, 518), (363, 538)
]

In [None]:
bpe_11 = [
    (2328, 2413), (2142, 2227), (815, 900), (833, 918), (891, 968), (938,1023), (1877, 1962), (1929, 2014), (975, 1060), (1027, 1112), (2089, 2252), (1543, 1686), (2313, 2456), (930, 1015), (1069, 1154), (1473, 1558)
    , (1531, 1674), (2416, 2595), (1383, 1526),(4004, 4175), (1023, 1108), (1089, 1180), (987, 1076), (954, 1039), (1145, 1238), (1666, 1809), (1745, 1900), (1797, 1980), (832, 917), (2629, 2820), (2246, 2445), (1879, 2054)
    , (852, 937), (2058, 2225), (987, 1072), (984, 1069), (2065, 2268), (1452, 1635), (1596, 1775), (1748, 1923), (1637, 1812), (1043, 1128), (1261, 1346), (1401, 1544), (1754, 1937), (1619, 1794), (1731, 1910), (1812,2003)
    , (910, 995), (965, 1050), (2067, 2158), (1054, 1141), (1631, 1794), (995, 1080), (1097, 1182), (994, 1079), (1253, 1338), (857, 934), (924, 1009), (2024, 2167), (1054, 1143), (2684, 2875), (1715,1886), (1615, 1782),
    (1848, 2023)
    
    , (1588, 1763), (1000, 1089), (954, 1039), (985, 1070),
    (1675, 1854), (1651, 1830), (1827, 2002), (2111, 2254),
    (1517, 1680), (1353, 1496), (1801, 1988), (1629, 1804),
    (1681, 1860), (1653, 1832), (1701, 1884), (950, 1035),
    (2163, 2306), (1966, 2145), (1596, 1771), (1385, 1470),
    (1283, 1368), (1512, 1597), (1774, 1957), (1673, 1852),
    (1737, 1916), (1654, 1829), (900, 979), (794, 875),
    (993, 1082), (1073, 1192), (1160, 1291), (1229, 1372),
    (1492, 1643), (973, 1058), (2046, 2241), (1850, 2025),
    (1271, 1446), (1731, 1906), (1562, 1737), (1248, 1375),
    (1641, 1816), (1611, 1786), (1861, 2036), (1601, 1776),
    (1710, 1889), (1645, 1736), (974, 1059), (947, 1032),
    (1638, 1813), (1483, 1642), (1673, 1856), (1446, 1597),
    (1363, 1502), (1619, 1794), (1870, 2065), (1588, 1763),
    (1316, 1451), (1773, 1936), (1504, 1655), (2033, 2240),
    (1851, 2022), (1605, 1780), (1347, 1486), (1463, 1618),
    (1454, 1609), (949, 1034), (2212, 2363), (1698, 1873),
    (1603, 1778), (1657, 1832), (1762, 1953), (2036, 2255),
    (1602, 1777), (939, 1024), (1550, 1725), (1714, 1889),
    (1789, 1980), (1694, 1873), (1643, 1818), (1803, 1978),
    (1603, 1778), (1659, 1834) ]

In [None]:
slice_checker(fgt_11, bpe_11, patients_ids_11)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_11, fgt_11, bpe_11)

# Part 12

In [None]:
folder_names_part12 = []
folder_names_part12 = get_folder_names(mpath12, folder_names_part12)

In [None]:
folder_list = []
for name in folder_names_part12:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [None]:
len(folder_list)

In [None]:
# Origin and destination directory
d_directory = 'E:/New Selected Data 2/'
o_directory = mpath12 + '/'

In [None]:
patients_ids_12 = ["Patients' ID's"]

In [None]:
fgt_12 = [(366, 541), (381, 468), (385, 470), (505, 590),(558, 737),
          (366, 541), (306, 441), (449, 534), (376, 543), (478, 559),
          (460, 547), (304, 467), (303, 474), (351, 426), (449, 534),
          (498, 583), (449, 534), (339, 482), (424, 591), (457, 542),
          (432, 517), (332, 495), (393, 480), (324, 409), (316, 401),
          (473, 558), (449, 534), (407, 594), (385, 556), (489, 574),
          (524, 609), (444, 603), (443, 602), (472, 615), (412, 547),
          (597, 740), (515, 650), (544, 687), (528, 671), (314, 445),
          (394, 513), (494, 621), (442, 585), (393, 544), (378, 463),
          (451, 618), (465, 640), (496, 639), (536, 679), (618, 761),
          (320, 495), (267, 398), (365, 450), (408, 559), (465, 550),
          (516, 659), (574, 717), (280, 439), (416, 559), (408, 551),
          (461, 604), (559, 702), (431, 516), (453, 538), (453, 538),
          (493, 612), (350, 517), (277, 362), (277, 362), (283, 368),
          (310, 395), (333, 418), (277, 362), (497, 574), (479, 622),
          (439, 614), (423, 598), (690, 873), (299, 384), (442, 527),
          (490, 637), (271, 356), (427, 614), (279, 364), (815, 958),
          (277, 364), (423, 508), (376, 571), (443, 528), (417, 502),
          (515, 658), (547, 702), (582, 745), (475, 646), (325, 412),
          (405, 490), (289, 374), (424, 515), (381, 454), (412, 497),
          (365, 436), (417, 604), (555, 730), (489, 574), (289, 374),
          (301, 388), (299, 384), (269, 354), (575, 750), (319, 404),
          (277, 362), (782, 997), (279, 364), (373, 458), (365, 450),
          (425, 510), (515, 690), (297, 382), (321, 406), (393, 478),
          (479, 622), (281, 366), (411, 496), (405, 490), (369, 454),
          (329, 398), (410, 557), (580, 767), (412, 591), (299, 384),
          (649, 800), (417, 502), (393, 478)]

In [None]:
bpe_12 = [(1602, 1777), (821, 908), (815, 900), (1003, 1088), (1818, 1997),
          (1602, 1777), (1266, 1401), (1059, 1144), (1492, 1659), (1331, 1412),
          (1355, 1442), (1432, 1595), (1491, 1662), (731, 806), (1059, 1144),
          (1156, 1241), (1059, 1144), (1359, 1502), (1588, 1755), (1048, 1133),
          (930, 1015), (1496, 1659), (929, 1016), (934, 1019), (902, 987),
          (1155, 1240), (1059, 1144), (1643, 1830), (1525, 1696), (1198, 1283),
          (1224, 1309), (1404, 1563), (1467, 1626), (1192, 1335), (1092, 1227),
          (2153, 2296), (1983, 2118), (2100, 2243), (2256, 2399), (974, 1105),
          (1102, 1221), (1250, 1377), (1998, 2141), (1153, 1304), (978, 1063),
          (1291, 1458), (1413, 1588), (1936, 2079), (2092, 2235), (2174, 2317),
          (1215, 1390), (927, 1058), (867, 952), (1176, 1327), (976, 1061),
          (2092, 2235), (2316, 2459), (1196, 1355), (1136, 1279), (1128, 1271),
          (1967, 2110), (1401, 1544), (1122, 1207), (979, 1064), (1156, 1241),
          (1314, 1433), (2096, 2263), (818, 903), (1300, 1385), (822, 907),
          (847, 932), (1866, 1951), (1474, 1559), (1158, 1235), (1303, 1446),
          (1723, 1898), (1719, 1894), (2274, 2457), (1510, 1595), (1071, 1156),
          (1961, 2108), (807, 892), (1795, 1982), (1641, 1726), (2395, 2538),
          (990, 1077), (1098, 1183), (1552, 1747), (1087, 1172), (1096, 1181),
          (1391, 1534), (1431, 1586), (1454, 1617), (2347, 2518), (1909, 1996),
          (927, 1012), (1235, 1320), (1138, 1229), (940, 1013), (1077, 1162),
          (838, 909), (1797, 1984), (1851, 2026), (1407, 1492), (811, 896),
          (833, 920), (1196, 1281), (812, 897), (1859, 2034), (841, 926),
          (814, 899), (3740, 3955), (801, 886), (1015, 1100), (953, 1038),
          (947, 1032), (1787, 1962), (837, 922), (843, 928), (949, 1034),
          (1355, 1498), (803, 888), (954, 1039), (936, 1021), (977, 1062),
          (854, 923), (1509, 1656), (1972, 2159), (1720, 1899), (821, 906),
          (1561, 1712), (991, 1076), (1042, 1127)]

In [None]:
slice_checker(fgt_12, bpe_12, patients_ids_12)

In [None]:
automated_copy(o_directory, d_directory, patients_ids_12, fgt_12, bpe_12)

## Step 2
### DICOM to PNG File

In [None]:
files_path = 'E:/Selected Data/'
destination_path = 'E:/03 Percentiles Breast MRI Data (FGT)/'

In [None]:
dicom_to_png_no_preprocessed(files_path, destination_path, data_info=data_labels, data_selection_index=3, out_shape=None)

## Step 3
### Create Final 2D Dataset

In [39]:
# Create the Right Dataset
files_path = 'E:/03 Percentiles Breast MRI Data (FGT)/'
slice_number = 3
random_state = 15
test_ratio = 0.15
validation_ratio = 0.1
both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels = side_data_seperator(files_path, slice_number, data_labels)
train_data_percentiles, validation_data_percentiles_right, test_data_percentiles_right, train_labels_percentiles, validation_labels_percentiles, test_labels_percentiles = dataset_creator(both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels, test_ratio=test_ratio, validation_ratio=validation_ratio, slice_number=slice_number, random_state=random_state, test_set_side='Right')

1/498
2/498
3/498
4/498
5/498
6/498
7/498
8/498
9/498
10/498
11/498
12/498
13/498
14/498
15/498
16/498
17/498
18/498
19/498
20/498
21/498
22/498
23/498
24/498
25/498
26/498
27/498
28/498
29/498
30/498
31/498
32/498
33/498
34/498
35/498
36/498
37/498
38/498
39/498
40/498
41/498
42/498
43/498
44/498
45/498
46/498
47/498
48/498
49/498
50/498
51/498
52/498
53/498
54/498
55/498
56/498
57/498
58/498
59/498
60/498
61/498
62/498
63/498
64/498
65/498
66/498
67/498
68/498
69/498
70/498
71/498
72/498
73/498
74/498
75/498
76/498
77/498
78/498
79/498
80/498
81/498
82/498
83/498
84/498
85/498
86/498
87/498
88/498
89/498
90/498
91/498
92/498
93/498
94/498
95/498
96/498
97/498
98/498
99/498
100/498
101/498
102/498
103/498
104/498
105/498
106/498
107/498
108/498
109/498
110/498
111/498
112/498
113/498
114/498
115/498
116/498
117/498
118/498
119/498
120/498
121/498
122/498
123/498
124/498
125/498
126/498
127/498
128/498
129/498
130/498
131/498
132/498
133/498
134/498
135/498
136/498
137/498
138/498
139/

In [40]:
# Save the Right Dataset
np.save('E:/2D Dataset/Train and Validation/train_data_percentiles.npy', train_data_percentiles)
np.save('E:/2D Dataset/Train and Validation/validation_data_percentiles_right.npy', validation_data_percentiles_right)
np.save('E:/2D Dataset/Independent Test/test_data_percentiles_right.npy', test_data_percentiles_right)
np.save('E:/2D Dataset/Train and Validation/train_labels_percentiles.npy', train_labels_percentiles)
np.save('E:/2D Dataset/Train and Validation/validation_labels_percentiles.npy', validation_labels_percentiles)
np.save('E:/2D Dataset/Independent Test/test_labels_percentiles.npy', test_labels_percentiles)

In [None]:
# Create the Left Dataset
files_path = 'E:/03 Percentiles Breast MRI Data (FGT)/'
slice_number = 3
random_state = 15
test_ratio = 0.15
validation_ratio = 0.1
both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels = side_data_seperator(files_path, slice_number, data_labels)
train_data_percentiles, validation_data_percentiles_left, test_data_percentiles_left, train_labels_percentiles, validation_labels_percentiles, test_labels_percentiles = dataset_creator(both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels, test_ratio=test_ratio, validation_ratio=validation_ratio, slice_number=slice_number, random_state=random_state, test_set_side='Right')

In [None]:
# Save the Left Dataset
np.save('E:/2D Dataset/Train and Validation/train_data_percentiles.npy', train_data_percentiles)
np.save('E:/2D Dataset/Train and Validation/validation_data_percentiles_left.npy', validation_data_percentiles_left)
np.save('E:/2D Dataset/Independent Test/test_data_percentiles_left.npy', test_data_percentiles_left)
np.save('E:/2D Dataset/Train and Validation/train_labels_percentiles.npy', train_labels_percentiles)
np.save('E:/2D Dataset/Train and Validation/validation_labels_percentiles.npy', validation_labels_percentiles)
np.save('E:/2D Dataset/Independent Test/test_labels_percentiles.npy', test_labels_percentiles)

# Create 3D Dataset

In [34]:
# Get only the information of patients in the 2D dataset
files_path = 'E:/03 Percentiles Breast MRI Data (FGT)/'
slice_number = 3
random_state = 15
test_ratio = 0.15
validation_ratio = 0.1
both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels = side_data_seperator(files_path, slice_number, data_labels)

train_set_info, validation_set_info, test_set_info  = dataset_creator(both_side_ok_data, one_side_ok_data, both_side_ok_labels, one_side_ok_labels, test_ratio=test_ratio, validation_ratio=validation_ratio, slice_number=slice_number, random_state=random_state, just_sets_info=True)

In [40]:
train_ids = train_set_info[:, 0]
validation_ids = validation_set_info[:, 0]
test_ids = test_set_info[:, 0]

In [41]:
test_ids

array(['9757971', '10189341', '11174620', '9839939', '9673179',
       '11331203', '11166357', '8152945', '9514578', '9246145', '8196738',
       '10542679', '10869884', '11181950', '9278286', '9834400',
       '9329696', '11358818', '11846812', '9174037', '11208270',
       '9186604', '11181609', '11193138', '8416350', '8180911',
       '11573971', '9918324', '8331255', '9407013', '10848845', '9099535',
       '8135824', '8934757', '11173404', '11197441', '8860408',
       '10714782', '11290557', '9024061', '10029952', '8273529',
       '8247267', '9403123', '9434346', '11351690', '8237600', '10609248',
       '9012992', '11585576', '9773985', '10700692', '10746817',
       '9833318', '9300041', '9727663', '8300634', '11181922', '9024288',
       '8268521', '11785907', '11272908', '11166410', '11390408',
       '10540768', '8276453', '9239385', '11610585', '10700667',
       '10479500', '9397693', '10675658', '8317510', '11389044',
       '10781387', '11881596', '10867800', '1568016',

In [43]:
import os
import shutil

# Define input folder and output folders
input_folder = "E:/Others/FGT Classification Dataset/2D Dataset"
train_folder = input_folder + "/Train"
validation_folder = input_folder + "/Validation"
test_folder = input_folder + "/Test"

# Define the sets of IDs for train, validation, and test
# train_set_ids = ['file_id_1', 'file_id_2', 'file_id_3']  # Example train set IDs
# validation_set_ids = ['file_id_4', 'file_id_5']  # Example validation set IDs
# test_set_ids = ['file_id_6', 'file_id_7']  # Example test set IDs

# Create output folders if they don't exist
# os.makedirs(train_folder, exist_ok=True)
# os.makedirs(validation_folder, exist_ok=True)
# os.makedirs(test_folder, exist_ok=True)

# Loop through all subfolders in the input folder
for subfolder_name in os.listdir(input_folder):
    subfolder_path = os.path.join(input_folder, subfolder_name)

    # Check if the subfolder is a directory
    if os.path.isdir(subfolder_path):
        # Define the file ID based on the subfolder name
        file_id = subfolder_name

        # Define the source folder path
        input_path = subfolder_path

        # Check where to move the subfolder based on its ID
        if file_id in train_ids:
            destination_path = os.path.join(train_folder, subfolder_name)
            shutil.move(input_path, destination_path)
            print(f"Moved subfolder {subfolder_name} to Train Folder")
        elif file_id in validation_ids:
            destination_path = os.path.join(validation_folder, subfolder_name)
            shutil.move(input_path, destination_path)
            print(f"Moved subfolder {subfolder_name} to Validation Folder")
        elif file_id in test_ids:
            destination_path = os.path.join(test_folder, subfolder_name)
            shutil.move(input_path, destination_path)
            print(f"Moved subfolder {subfolder_name} to Test Folder")
        else:
            print(f"Subfolder {subfolder_name} is not in any of the sets. Skipping.")

print("Subfolders have been processed.")


Moved subfolder 10006533 to Train Folder
Moved subfolder 10007023 to Train Folder
Moved subfolder 10007991 to Train Folder
Moved subfolder 10010860 to Validation Folder
Moved subfolder 10013259 to Train Folder
Moved subfolder 10018165 to Train Folder
Moved subfolder 10019832 to Train Folder
Moved subfolder 10022835 to Validation Folder
Moved subfolder 10023779 to Train Folder
Moved subfolder 10029875 to Train Folder
Moved subfolder 10029952 to Test Folder
Moved subfolder 10030148 to Train Folder
Moved subfolder 10030277 to Train Folder
Moved subfolder 10032605 to Train Folder
Moved subfolder 10032779 to Train Folder
Moved subfolder 10045260 to Test Folder
Moved subfolder 10184718 to Train Folder
Moved subfolder 10189341 to Test Folder
Moved subfolder 10189378 to Train Folder
Moved subfolder 10473298 to Train Folder
Moved subfolder 10479448 to Train Folder
Moved subfolder 10479500 to Test Folder
Moved subfolder 10480019 to Train Folder
Moved subfolder 10481554 to Train Folder
Moved subf

In [44]:
import nibabel as nib

# Load the NIfTI file
nii_file = nib.load("E:/Others/FGT Classification Dataset/3D Dataset/Train/8163477.nii.gz")

# Print the NIfTI header
print(nii_file.header)


<class 'nibabel.nifti1.Nifti1Header'> object, endian='<'
sizeof_hdr      : 348
data_type       : b''
db_name         : b''
extents         : 0
session_error   : 0
regular         : b'r'
dim_info        : 0
dim             : [  3 512 512  88   1   1   1   1]
intent_p1       : 0.0
intent_p2       : 0.0
intent_p3       : 0.0
intent_code     : none
datatype        : int16
bitpix          : 16
slice_start     : 0
pixdim          : [1.        0.6641    0.6641    2.3999965 0.        0.        0.
 0.       ]
vox_offset      : 0.0
scl_slope       : nan
scl_inter       : nan
slice_end       : 0
slice_code      : unknown
xyzt_units      : 2
cal_max         : 0.0
cal_min         : 0.0
slice_duration  : 0.0
toffset         : 0.0
glmax           : 0
glmin           : 0
descrip         : b''
aux_file        : b''
qform_code      : scanner
sform_code      : scanner
quatern_b       : 0.0
quatern_c       : 0.0
quatern_d       : 0.0
qoffset_x       : -176.699
qoffset_y       : -186.074
qoffset_z       : 

In [50]:
data_labels = pd.read_csv("E:\Others\FGT Classification Dataset\data_labels.csv")
print(data_labels.head())

       code  FGT  BPE Side
0  10006533    3    1    L
1  10007023    2    3    L
2  10007991    4    1    B
3  10010860    3    3    R
4  10013259    3    1    R


In [51]:

# Generate the new 'name' column
data_labels.insert(0, "name", [f"Breast_MRI_{i:03d}" for i in range(len(data_labels))])



In [54]:
data_labels.head()

Unnamed: 0,name,code,FGT,BPE,Side
0,Breast_MRI_000,10006533,3,1,L
1,Breast_MRI_001,10007023,2,3,L
2,Breast_MRI_002,10007991,4,1,B
3,Breast_MRI_003,10010860,3,3,R
4,Breast_MRI_004,10013259,3,1,R


In [71]:
df = data_labels

# Folder containing .nii files
folder_path = "E:/Others/FGT Classification Dataset/2D Dataset/Validation"  # Replace with your actual folder path

# Create a dictionary to map 'code' to 'name'
code_to_name = dict(zip(df['code'].astype(str), df['name']))

# Iterate through subfolders in the main folder
for subfolder in os.listdir(folder_path):
    subfolder_path = os.path.join(folder_path, subfolder)

    # Check if it's a directory and if its name is in the mapping dictionary
    if os.path.isdir(subfolder_path) and subfolder in code_to_name:
        new_name = code_to_name[subfolder]  # Get the corresponding name
        new_subfolder_path = os.path.join(folder_path, new_name)

        # Rename the folder
        os.rename(subfolder_path, new_subfolder_path)

print("Subfolders have been renamed successfully.")

Subfolders have been renamed successfully.


In [72]:
df.to_csv('E:/Others/FGT Classification Dataset/your_dataframe.csv', index=False)

In [66]:
# data_labels

In [45]:
# Check for any extensions that might contain leftover DICOM metadata
extensions = nii_file.header.extensions
if extensions:
    for ext in extensions:
        print(ext)

In [46]:
extensions

Nifti1Extensions()

In [35]:
np.where(test_set_info[:, 0]=='10609248')

(array([47], dtype=int64),)

In [39]:
np.percentile(np.arange(112), 60)

66.6

In [31]:
np.save("E:/Dataset Information/train_set_info.npy", train_set_info)
np.save("E:/Dataset Information/validation_set_info.npy", validation_set_info)
np.save("E:/Dataset Information/test_set_info.npy", test_set_info)

In [None]:
data_path = "E:/Resampled ALL Dimensions 3D NRRD Dataset/"
train_data_3d, train_labels_3d = create_3d_train_set(data_path, train_set_info)

In [72]:
np.save('E:/07 3D Dataset/train_data_3d.npy', train_data_3d)
np.save('E:/07 3D Dataset/train_labels_3d.npy', train_labels_3d)

In [77]:
data_path = "E:/Resampled ALL Dimensions 3D NRRD Dataset/"
validation_data_3d_right, validation_data_3d_left, validation_labels_3d = create_3d_test_validation(data_path, validation_set_info)

In [78]:
np.save('E:/07 3D Dataset/validation_data_3d_left.npy', validation_data_3d_left)
np.save('E:/07 3D Dataset/validation_data_3d_right.npy', validation_data_3d_right)
np.save('E:/07 3D Dataset/validation_labels_3d.npy', validation_labels_3d)

In [85]:
data_path = "E:/Resampled ALL Dimensions 3D NRRD Dataset/"
test_data_3d_right, test_data_3d_left, test_labels_3d = create_3d_test_validation(data_path, test_set_info)

In [None]:
np.save('E:/07 3D Dataset/test_data_3d_right.npy', test_data_3d_right)
np.save('E:/07 3D Dataset/test_data_3d_left.npy', test_data_3d_left)
np.save('E:/07 3D Dataset/test_labels_3d.npy', test_labels_3d)