In [7]:
import os
import shutil
import pandas as pd
import pyreadstat
import numpy as np

In [8]:
data, meta = pyreadstat.read_sav('Dr.miratashi_3 copy.sav')

In [9]:
def move_folders(folder_list, source_directory, destination_directory):
    for folder_name in folder_list:
        source_path = os.path.join(source_directory, folder_name)
        destination_path = os.path.join(destination_directory, folder_name)
        shutil.move(source_path, destination_path)
        print(f"Moved folder '{folder_name}' from '{source_directory}' to '{destination_directory}'")

In [10]:
def create_pid_folder(parent_directory, pid):
    for folder_name in pid:
        folder_path = os.path.join(parent_directory, str(folder_name))
        os.makedirs(folder_path, exist_ok=True)

        # Create the "FGT" and "BPE" sub folders within each folder
        fgt_folder_path = os.path.join(folder_path, 'FGT')
        bpe_folder_path = os.path.join(folder_path, 'BPE')
        os.makedirs(fgt_folder_path, exist_ok=True)
        os.makedirs(bpe_folder_path, exist_ok=True)

In [11]:
def automated_copy(origin_directory, destination_directory, pid, fgt_idx, bpe_idx):
    img_folder_names = []
    for patient_id in pid:
        dcm_directory = origin_directory + str(patient_id) + '/IMAGE/DCM/'
        for i, (root, dirs, files) in enumerate(os.walk(dcm_directory)):
            if i == 0:
                img_folder_names.append(dcm_directory + str(dirs[i]))
            
    for i, patient_id in enumerate(pid):
        start_fgt, end_fgt = fgt_idx[i]
        start_bpe, end_bpe = bpe_idx[i]
        destination_fgt_folder = destination_directory + str(patient_id) + '/FGT'
        destination_bpe_folder = destination_directory + str(patient_id) + '/BPE'
        for j in range(start_fgt, end_fgt+1):
            if len(str(j)) == 4:
                fgt_img_directory = img_folder_names[i] + '/I000' + str(j)
            else:
                fgt_img_directory = img_folder_names[i] + '/I0000' + str(j)
            shutil.copy(fgt_img_directory, destination_fgt_folder)
            
        for j in range(start_bpe, end_bpe+1):
            if len(str(j)) == 4:
                bpe_img_directory = img_folder_names[i] + '/I000' + str(j)
            else:
                bpe_img_directory = img_folder_names[i] + '/I0000' + str(j)
            shutil.copy(bpe_img_directory, destination_bpe_folder)

In [12]:

def get_folder_names(path, folders):
    for entry in os.scandir(path):
        if entry.is_dir():
            folders.append(entry.name)
    return folders



# Data Preprocessing
## Step 1
### At this step, we get the unique patient IDs and remove data with multiple diagnosis.

In [13]:
folder_names = []

mpath = 'E:/Breast Data/Breast/'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'E:/Breast Data/breast newNAC'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'H:/Breast MRI/01/sps'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'H:/Breast MRI/02/sps'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'H:/Breast MRI/03/breast new'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'H:/Breast MRI/03/breast new 2'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'H:/Breast MRI/03/nac'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'H:/Breast MRI/03/sps'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'G:/Breast Data/nac'
folder_names = get_folder_names(mpath, folder_names)

mpath = 'G:/Breast Data/sps'
folder_names = get_folder_names(mpath, folder_names)


In [14]:
folder_names = np.array(folder_names)

In [15]:
folder_names_unique = np.unique(folder_names)

In [16]:
data_codes = np.array(data.code)
data_codes_unique = np.unique(data_codes)

In [17]:
same_codes_unique = []
for i in folder_names_unique:
    if i in data_codes_unique:
        same_codes_unique.append(i)
        
same_codes_unique = np.array(same_codes_unique)

In [18]:
not_in_dataset = []
for i in data_codes_unique:
    if i not in same_codes_unique:
        not_in_dataset.append(i)

not_in_dataset_unique = np.unique(np.array(not_in_dataset))

In [19]:
same_codes_unique_mdd_removed = []
for i in same_codes_unique:
    if np.array(data[data.code == i].FG).shape[0] > 1:
        fg_i = np.array(data[data.code == i].FG)
        bpe_i = np.array(data[data.code == i].BPE)
        if np.all(fg_i==fg_i[0]) and np.all(bpe_i==bpe_i[0]):
            same_codes_unique_mdd_removed.append(i)
    else:
        same_codes_unique_mdd_removed.append(i)

In [20]:
not_in_dataset_unique_mdd_removed = []
for i in not_in_dataset_unique:
    if np.array(data[data.code == i].FG).shape[0] > 1:
        fg_i = np.array(data[data.code == i].FG)
        bpe_i = np.array(data[data.code == i].BPE)
        if np.all(fg_i==fg_i[0]) and np.all(bpe_i==bpe_i[0]):
            not_in_dataset_unique_mdd_removed.append(i)
    else:
        not_in_dataset_unique_mdd_removed.append(i)
        

In [21]:
all_ok_data = same_codes_unique_mdd_removed + not_in_dataset_unique_mdd_removed

In [22]:
len(all_ok_data)

1146

# Part 1

In [23]:
mpath = 'H:/Breast MRI/01/sps'
folder_names_part1 = []
folder_names_part1 = get_folder_names(mpath, folder_names_part1)

In [24]:
for name in folder_names_part1:
    if name not in all_ok_data:
        print(name)

In [25]:
no_data_cases = ['9222704', '9999465', '8881092', '11258955', '11843612']

In [26]:
len(all_ok_data)

1146

In [27]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [28]:
len(all_ok_data)

1142

In [29]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/01/sps'

In [30]:
patients_ids_1 = [8860408, 8861603, 8861971, 8866708, 8889836, 8915593, 8915632, 8926218, 8934757, 8935156, 8939476, 8977117,
                  9008081, 9017845, 9108321, 9162757, 9170060, 9173422, 9174037, 9182263, 9221819, 9239385, 9246035, 9354593,
                  9362401, 9363679, 9410575, 9459081, 9459302, 9459663, 9471658, 9471667, 9471756, 9471997, 9474758, 9484852,
                  9484938, 9486031, 9486352, 9492065, 9512135, 9535197, 9535278, 9535434, 9559948, 9571865, 9577964, 9578077,
                  9638156, 9638749, 9646672, 9647040]

In [31]:
fgt_1 = [(300, 451), (373, 458), (558, 685), (396, 481), (536, 621), (379, 550), (434, 517), (335, 478), (582, 661), (587, 662),
         (303, 454), (348, 523), (533, 618), (365, 440), (514, 599), (335, 510), (405, 548), (589, 674), (505, 590), (348, 499),
         (521, 606), (387, 542), (407, 492), (554, 639), (365, 450), (365, 450), (432, 513), (490, 567), (371, 442), (464, 549),
         (365, 524), (365, 528), (422, 573), (338, 485), (591, 676), (493, 668), (393, 560), (432, 587), (595, 758), (325, 448),
         (365, 528), (365, 438), (302, 367), (365, 430), (365, 438), (501, 586), (569, 654), (529, 614), (277, 356), (277, 352),
         (281, 366), (249, 314)]

In [32]:
bpe_1 = [(1060, 1211), (920, 1005), (2589, 2716), (914, 999), (1162, 1247), (1615, 1786), (1042, 1125), (1235, 1378), (1054, 1133),
         (1035, 1110), (1243, 1394), (1408, 1583), (1071, 1156)]

In [33]:
# create_pid_folder(d_directory, patients_ids_1)

In [34]:
# automated_copy(o_directory, d_directory, patients_ids_1, fgt_1, bpe_1)

# Part 2

In [35]:
mpath = 'H:/Breast MRI/02/sps'
folder_names_part2 = []
folder_names_part2 = get_folder_names(mpath, folder_names_part2)

In [36]:
for name in folder_names_part2:
    if name not in all_ok_data:
        print(name)

In [37]:
no_data_cases = ['1191379', '8368349', '8832712', '8145891', '8302127', '8305735', '8414121', '8675439', '8675610', '8682515']

In [38]:
len(all_ok_data)

1142

In [39]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [40]:
len(all_ok_data)

1135

In [41]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [42]:
patients_ids_2 = []

In [43]:
fgt_2 = []

In [44]:
bpe_2 = []

In [45]:
# create_pid_folder(d_directory, patients_ids_2)

In [46]:
# automated_copy(o_directory, d_directory, patients_ids_2, fgt_2, bpe_2)

# Part 3

In [47]:
mpath = 'H:/Breast MRI/03/breast new'
folder_names_part3 = []
folder_names_part3 = get_folder_names(mpath, folder_names_part3)

In [48]:
for name in folder_names_part3:
    if name not in all_ok_data:
        print(name)

In [49]:
no_data_cases = []

In [50]:
len(all_ok_data)

1135

In [51]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [52]:
len(all_ok_data)

1135

In [53]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [54]:
patients_ids_3 = []

In [55]:
fgt_3 = []

In [56]:
bpe_3 = []

In [57]:
# create_pid_folder(d_directory, patients_ids_3)

In [58]:
# automated_copy(o_directory, d_directory, patients_ids_3, fgt_3, bpe_3)

# Part 4

In [59]:
mpath = 'H:/Breast MRI/03/breast new 2'
folder_names_part4 = []
folder_names_part4 = get_folder_names(mpath, folder_names_part4)

In [60]:
folder_list = []
for name in folder_names_part4:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [61]:
len(folder_list)

0

In [62]:
# source_directory = mpath
# destination_directory = "H:/Breast MRI/03/NotOkData/breast new 2"  # Replace with the actual destination directory
# move_folders(folder_list, source_directory, destination_directory)

In [63]:
no_data_cases = ['10942269']

In [64]:
len(all_ok_data)

1135

In [65]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [66]:
len(all_ok_data)

1134

In [67]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [68]:
patients_ids_4 = []

In [69]:
fgt_4 = []

In [70]:
bpe_4 = []

In [71]:
# create_pid_folder(d_directory, patients_ids_4)

In [72]:
# automated_copy(o_directory, d_directory, patients_ids_4, fgt_4, bpe_4)

# Part 5

In [73]:
mpath = 'H:/Breast MRI/03/nac'
folder_names_part5 = []
folder_names_part5 = get_folder_names(mpath, folder_names_part5)

In [74]:
folder_list = []
for name in folder_names_part5:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [75]:
len(folder_list)

0

In [76]:
# source_directory = mpath
# destination_directory = "H:/Breast MRI/03/NotOkData/nac"  # Replace with the actual destination directory
# move_folders(folder_list, source_directory, destination_directory)

In [77]:
no_data_cases = []

In [78]:
len(all_ok_data)

1134

In [79]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [80]:
len(all_ok_data)

1134

In [81]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [82]:
patients_ids_5 = []

In [83]:
fgt_5 = []

In [84]:
bpe_5 = []

In [85]:
# create_pid_folder(d_directory, patients_ids_5)

In [86]:
# automated_copy(o_directory, d_directory, patients_ids_5, fgt_5, bpe_5)

# Part 6

In [87]:
mpath = 'H:/Breast MRI/03/sps'
folder_names_part6 = []
folder_names_part6 = get_folder_names(mpath, folder_names_part6)

In [88]:
folder_list = []
for name in folder_names_part6:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [89]:
len(folder_list)

0

In [90]:
# source_directory = mpath
# destination_directory = "H:/Breast MRI/03/NotOkData/sps"  # Replace with the actual destination directory
# move_folders(folder_list, source_directory, destination_directory)

In [91]:
no_data_cases = ['8390792', '8564813', '8613792', '8756913']

In [92]:
len(all_ok_data)

1134

In [93]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [94]:
len(all_ok_data)

1130

In [95]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [96]:
patients_ids_6 = []

In [97]:
fgt_6 = []

In [98]:
bpe_6 = []

In [99]:
# create_pid_folder(d_directory, patients_ids_1)

In [100]:
# automated_copy(o_directory, d_directory, patients_ids_6, fgt_6, bpe_6)

# Part 7

In [101]:
mpath = "E:/Breast Data/Breast"
folder_names_part7 = []
folder_names_part7 = get_folder_names(mpath, folder_names_part7)

In [102]:
folder_list = []
for name in folder_names_part7:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [103]:
len(folder_list)

0

In [104]:
# source_directory = mpath
# destination_directory = "E:/Breast Data/NotOkData/Breast"  # Replace with the actual destination directory
# move_folders(folder_list, source_directory, destination_directory)

In [105]:
no_data_cases = ['11821274']

In [106]:
len(all_ok_data)

1130

In [107]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [108]:
len(all_ok_data)

1129

In [109]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [110]:
patients_ids_7 = []

In [111]:
fgt_7 = []

In [112]:
bpe_7 = []

In [113]:
# create_pid_folder(d_directory, patients_ids_7)

In [114]:
# automated_copy(o_directory, d_directory, patients_ids_7, fgt_7, bpe_7)

# Part 8

In [115]:
mpath = "E:/Breast Data/breast newNAC"
folder_names_part8 = []
folder_names_part8 = get_folder_names(mpath, folder_names_part8)

In [116]:
folder_list = []
for name in folder_names_part8:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [117]:
len(folder_list)

0

In [118]:
# source_directory = mpath
# destination_directory = "E:/Breast Data/NotOkData/breast newNAC"  # Replace with the actual destination directory
# move_folders(folder_list, source_directory, destination_directory)

In [119]:
no_data_cases = ['10905568', '10937977']

In [120]:
len(all_ok_data)

1129

In [121]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [122]:
len(all_ok_data)

1127

In [123]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [124]:
patients_ids_8 = []

In [125]:
fgt_8 = []

In [126]:
bpe_8 = []

In [127]:
# create_pid_folder(d_directory, patients_ids_1)

In [128]:
# automated_copy(o_directory, d_directory, patients_ids_1, fgt_1, bpe_1)

# Part 9

In [129]:
mpath = "G:/Breast Data/nac"
folder_names_part9 = []
folder_names_part9 = get_folder_names(mpath, folder_names_part9)

In [130]:
folder_list = []
for name in folder_names_part9:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [131]:
len(folder_list)

0

In [132]:
# source_directory = mpath
# destination_directory = "G:/Breast Data/NotOkData/nac"  # Replace with the actual destination directory
# move_folders(folder_list, source_directory, destination_directory)

In [133]:
no_data_cases = []

In [134]:
len(all_ok_data)

1127

In [135]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [136]:
len(all_ok_data)

1127

In [137]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [138]:
patients_ids_9 = []

In [139]:
fgt_9 = []

In [140]:
bpe_9 = []

In [141]:
# create_pid_folder(d_directory, patients_ids_9)

In [142]:
# automated_copy(o_directory, d_directory, patients_ids_9, fgt_9, bpe_9)

# Part 10

In [156]:
mpath = "G:/Breast Data/sps"
folder_names_part10 = []
folder_names_part10 = get_folder_names(mpath, folder_names_part10)

In [157]:
folder_list = []
for name in folder_names_part10:
    if name not in all_ok_data:
        print(name)
        folder_list.append(name)

In [158]:
len(folder_list)

0

In [159]:
# source_directory = mpath
# destination_directory = "G:/Breast Data/NotOkData/sps"  # Replace with the actual destination directory
# move_folders(folder_list, source_directory, destination_directory)

In [160]:
no_data_cases = ['8867592']

In [161]:
len(all_ok_data)

1127

In [162]:
for pid in no_data_cases:
    if pid in all_ok_data:
        all_ok_data.remove(pid)

In [163]:
len(all_ok_data)

1126

In [137]:
# Origin and destination directory
d_directory = 'E:/Selected Data/'
o_directory = 'H:/Breast MRI/02/sps'

In [138]:
patients_ids_10 = []

In [139]:
fgt_10 = []

In [140]:
bpe_10 = []

In [141]:
# create_pid_folder(d_directory, patients_ids_10)

In [142]:
# automated_copy(o_directory, d_directory, patients_ids_10, fgt_10, bpe_10)