In [None]:
import sys, os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

sys.path.append('/content/drive/My Drive/radiomics/Radiomics Workshop')
os.chdir('drive/My Drive/radiomics/Radiomics Workshop')

In [None]:
!pip install SimpleITK==1.2.4

In [None]:
!pip install pyradiomics===3.0

In [None]:
import torch
import random
import os
from os import listdir
from os.path import isfile, join

MICE_IDS = ["MB1333220714F000H00000000C000000", "MB1396110913F000H00000000C000000", "MB1312240614F000H00000000C000000",
            "MB1444220714F000H00000000C000000", "MB1589241214FC00H00004052C000000", "MB1752210615FC00H00000557C000000",
            "MB1858100815FC00H00001912C000000", "MB1858230615FC00H00001912C000000",
            "MB1583050215FC00H00004052C000000", "MB1598010315FC00H00004457C000000", "MB1695040215F000H00000000C000000",
            "MB1392140814F000H00000000C000000", "MB1588050215FC00H00004052C000000", "MB4134091016F000H00000000C000000",
            "MB1472110315FC00H00004052C000000", "MB1755210615FC00H00000557C000000", "MB1953010315FC00H00004457C000000",
            "MB1398110913F000H00000000C000000", "MB1468010315FC00H00003912C000000",
            "MB1363150215F000H00000000C000000", "MB1370270414F000H00000000C000000", "MB1408220714F000H00000000C000000",
             "MB1452010315FC00H00000188C000000", "MB1512290714F000H00000000C000000", "MB1513090614F000H00000000C000000",
             "MB1539110315FC00H00001912C000000", "MB1747010315FC00H00000072C000000", "MB1858140715FC00H00001912C000000",
             "MB4022091016F000H00000000C000000"]

'''
rearrange takes dataset located in old_folder_path, and copy and split it into train and test in new_folder_path.
tumor_threshold - half of the patches in both train and test will have at least this amount of tumor, and the other half less than that.
train_precentage - the percentage of the train compared to all data (train+test)
mice - The names of the folders in the old_folder_path, each contains different mice scan, that we want to split into train and test.
The default if all the mice above, but one can pass other scans, for example: rearrange(mice=["MB1452010315FC00H00000188C000000", "MB1513090614F000H00000000C000000"])
by_slices - if we want to split between train and test randomly - by_slices=False; if we want to split them between different range of slices, by_slices=True
'''

def rearrange(old_folder_path, new_folder_path, tumor_threshold, train_precentage, mice=MICE_IDS, by_slices=False):
  for mouse in mice:
    print("start", mouse)
    current_folder = old_folder_path + "/" + mouse
    img_names = [f for f in listdir(current_folder) if isfile(join(current_folder, f))]
    all_tumor_tensors = []
    all_no_tumor_tensors = []
    print("loading")
    for i in range(len(img_names)):
      loaded = torch.load(current_folder+"/"+img_names[i])
      if loaded["tumor_percentage"] >= tumor_threshold:
        all_tumor_tensors.append(loaded)
      else:
        all_no_tumor_tensors.append(loaded)
    if by_slices:
      all_tumor_tensors = sorted(all_tumor_tensors, key=lambda k: k['slice_range'][0])
      print(0, all_tumor_tensors[0]['slice_range'])
      print(int(len(all_tumor_tensors)/2), all_tumor_tensors[int(len(all_tumor_tensors)/2)]['slice_range'])
      print(len(all_tumor_tensors)-1, all_tumor_tensors[len(all_tumor_tensors)-1]['slice_range'])
      all_no_tumor_tensors = sorted(all_no_tumor_tensors, key=lambda k: k['slice_range'][0])
    else:
      random.shuffle(all_tumor_tensors)
      random.shuffle(all_no_tumor_tensors)
    
    train_size_tumor = int(train_precentage*len(all_tumor_tensors))
    train_size_no_tumor = int(train_precentage*len(all_no_tumor_tensors))

    print("saving")  
    resave(all_tumor_tensors, 0, train_size_tumor, 
           folder = new_folder_path + "/train/" + mouse, file_prefix="more")
    resave(all_tumor_tensors, train_size_tumor+1, len(all_tumor_tensors), 
           folder = new_folder_path + "/test/" + mouse, file_prefix="more") 
    resave(all_no_tumor_tensors, 0, train_size_no_tumor, 
           folder = new_folder_path + "/train/" + mouse, file_prefix="less")
    resave(all_no_tumor_tensors, train_size_no_tumor+1, len(all_no_tumor_tensors), 
           folder = new_folder_path + "/test/" + mouse, file_prefix="less")   
    print("finish", mouse)
    
    
def resave(dict_arr, start_index, end_index, folder, file_prefix):
  if not os.path.isdir(folder):
    os.makedirs(folder)
  counter = 1
  for i in range(start_index, end_index):
    torch.save(dict_arr[i], folder + "/" + file_prefix + "_" + str(counter))
    counter += 1

2d dataset creation

In [None]:
from create_data_set import main

'''
for full information about the main function's arguments, see the notes above the create_dataset function, in create_data_set.py. If you have new scans,
that you want to run the process just on them, make sure to create a list that contains only the scans' names
(for example arr= ["MB1755210615FC00H00000557C000000", "MB1333220714F000H00000000C000000"]) and pass this array as an argument to both main and
rearrange functions. The arguments for "rearrange" listed above.
Make sure you use the right parameters for you.
'''

old_folder_path = "datasets/dataForNet_shuffle_by_slices_2d_600_30per"
new_folder_path = "datasets/dataForNet_shuffle_by_slices_2d_600_30per"
tumor_threshold= 0.3
train_precentage = 0.65

main(inp_path="datasets/miceData", data_per_mouse=600, patch_size=50, s_per_patch=1, tumor_percent=0.3,
     folder_name=old_folder_path, to_shuffle=True, is_data_edited=False, to_resize=True)

'''
One can also use already converted png (from DICOM) files, but for you it will be probably easier to use the above option.
Here is an example regardless:
main(inp_path="datasets/newMiceData", data_per_mouse=600, patch_size=50, s_per_patch=1, tumor_percent=0.3,
     folder_name=old_folder_path, to_shuffle=True, is_data_edited=True)
'''

rearrange(old_folder_path=old_folder_path, new_folder_path=new_folder_path,
          tumor_threshold=tumor_threshold, train_precentage=train_precentage, by_slices=True)


3d dataset creation

In [None]:
from create_data_set import main

'''
for full information about the main function's arguments, see the notes above the create_dataset function, in create_data_set.py. If you have new scans,
that you want to run the process just on them, make sure to create a list that contains only the scans' names
(for example arr= ["MB1755210615FC00H00000557C000000", "MB1333220714F000H00000000C000000"]) and pass this array as an argument to both main and
rearrange functions. The arguments for "rearrange" listed above.
Make sure you use the right parameters for you.
'''

old_folder_path = "datasets/‏‏dataForNet_new_3d_300_20per"
new_folder_path = "datasets/‏‏dataForNet_new_3d_300_20per"
tumor_threshold= 0.2
train_precentage = 0.65

main(inp_path="datasets/miceData", data_per_mouse=300, patch_size=30, s_per_patch=30, tumor_percent=0.2,
     folder_name=old_folder_path, to_shuffle=True, is_data_edited=False, to_resize=True)


'''
One can also use already converted png (from DICOM) files, but for you it will be probably easier to use the above option.
Here is an example regardless:
main(inp_path="datasets/newMiceData", data_per_mouse=300, patch_size=30, s_per_patch=30, tumor_percent=0.2,
     folder_name=old_folder_path, to_shuffle=True, is_data_edited=True)
'''

rearrange(old_folder_path=old_folder_path, new_folder_path=new_folder_path,
          tumor_threshold=tumor_threshold, train_precentage=train_precentage, by_slices=False)