# Coding exercise for Wang Lab
### Implementation of nnU-net for the segmentation of pancreas and pancreatic lesions on CT scan, with classication of the pancreatic lesions
### by Leo Chen
### August/September 2024

In [1]:
### IMPORTS
import os
import glob
#import util

import numpy as np
import pandas as pd
import random
import math
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
from torch.autograd import Variable
import torch.nn.init as init

from collections import defaultdict
from collections import Counter

from datetime import datetime

import SimpleITK as sitk
#import nibabel as nib

import json
import shutil

In [2]:
# check if cuda is working
torch.cuda.is_available()
torch.cuda.device_count()
torch.cuda.current_device()

0

In [6]:
### GLOBAL VARIABLES

# using GPU?
gpu = False

# first time running? need to generate csv and npy files
firstrun = True

# use resampled (1x1x3 mm voxel size) or original data?
useResampledData = True

# directories where the files are
traindir = r'C:\Users\Leo\Documents\UHN-MedImg3D-ML-quiz\train'
valdir = r'C:\Users\Leo\Documents\UHN-MedImg3D-ML-quiz\validation'
testdir = r'C:\Users\Leo\Documents\UHN-MedImg3D-ML-quiz\test'

csvpath = r'C:\Users\Leo\OneDrive\Documents\GitHub\WangLabQuiz\csv files\trainval_metadata.csv'   # csv with the image dimensions, image and mask file paths

nnUNet_raw_dir = r'C:\Users\Leo\OneDrive\Documents\UHN-MedImg3D-ML-quiz\nnUnet_raw'

In [7]:
print(nnUNet_raw_dir)

C:\Users\Leo\OneDrive\Documents\UHN-MedImg3D-ML-quiz\nnUnet_raw


In [4]:
### FUNCTIONS FOR SITK and IMAGE AUGMENTATION

def rotateImage(original, anglex, angley, anglez, interpolate='linear'):
    """ Returns the 'rotated' 3d image about the physical center that is resampled based on the 'original' image
    1. original - original image 
    2. angle x is roll / twisting the body like a rolling pin, turning in dance
    3. angle y is yaw / rotating the body like a propeller blade, like break dancing
    4. angle z - pitch / tilt along the superior/inferior axis (i.e trendelenburg)
    
    """

    if interpolate == 'linear':
        interpolator = sitk.sitkLinear
    elif interpolate == 'NN':
        interpolator = sitk.sitkNearestNeighbor

    radx = anglex * math.pi / 180
    rady = angley * math.pi / 180
    radz = anglez * math.pi / 180

    origin = np.array(original.GetOrigin())
    pixelcenter = np.array(sitk.GetSize(original)) / 2.
    physicalcenter = sitk.TransformContinuousIndexToPhysicalPoint(pixelcenter)

    transform = sitk.Euler3DTransform()
    transform.SetCenter(physicalcenter)
    transform.SetRotation(radz, rady, radx)    # note the order is z, y, x

    unitvecs = np.transpose(np.reshape(original.GetDirection(), (-1, 3)))
    #print(unitvecs)
    matrix = np.reshape(transform.GetMatrix(), (-1, 3))
    inverse = np.linalg.inv(matrix)


    # the transform matrix is actually mapping backwards: post to pre
    # therefore the forward transformation is the inverse matrix
    transformedunitvecs = inverse @ unitvecs   # new i, j, k are columns
    #print(transformedunitvecs)
    newdirection = transformedunitvecs.flatten('F')    # flatten by column

    print(newdirection)
    neworigin = (matrix @ (origin - physicalcenter)) + physicalcenter

    rotatedImage = sitk.Resample(original, original, transform, interpolator)
    rotatedImage.SetDirection(newdirection)
    rotatedImage.SetOrigin(neworigin)

    return rotatedImage

def flipImage(original):
    """Flips an SimpleITK over left/right axis"""
    flipped = sitk.Flip(original, [True, False, False])
    return flipped

def flipslice(original):
    """Flips a numpy slice (2d image) """
    # flips 2D slice (reverses x indices)
    flipped = np.flipud(original)  #np.fliplr(original)
    return flipped

def bbox_3D(img):
    """Finds the bounding box around a 3D image (numpy)
    returns rmin, rmax, cmin, cmax, zmin, zmax (r = row, c = column)"""
    try:    
        z = np.any(img, axis=(1, 2))    #z
        c = np.any(img, axis=(0, 1))    #x , (c = column)
        r = np.any(img, axis=(0, 2))    #y , (r = row)

        rmin, rmax = np.where(r)[0][[0, -1]]
        cmin, cmax = np.where(c)[0][[0, -1]]
        zmin, zmax = np.where(z)[0][[0, -1]]

        #x min max, y min max, z min max
        return [rmin, rmax, cmin, cmax, zmin, zmax]
    except:
        return -1, -1, -1, -1, -1, -1


def bbox_2D(img):
    """Finds the bounding box around a 2D image (numpy)
    returns rmin, rmax, cmin, cmax (r = row, c = column)
    If no elements exist, then returns (-1, -1, -1, -1)"""
    
    try:
        c = np.any(img, axis=0)    #y , (c = column)
        r = np.any(img, axis=1)    #x , (r = row)

        rmin, rmax = np.where(r)[0][[0, -1]]
        cmin, cmax = np.where(c)[0][[0, -1]]
    
        return rmin, rmax, cmin, cmax
    except:
        return -1, -1, -1, -1


def cropImage(image, threshold, xshift, yshift):
    """Crops SimpleITK image to remove pixels below a threshold (e.g. black space)
    Can also shift by *xshift and *yshift (random shifts in pixels) for augmentation"""
    # load image
    npy = sitk.GetArrayFromImage(image)

    # GET METADATA
    direction = image.GetDirection()
    spacing = image.GetSpacing()

    # CALCULATE BOUNDING BOX OF BODY (removes black space)
    mask = npy > threshold
    [xmin, xmax, ymin, ymax, zmin, zmax] = bbox_3D(mask)

    # check to make sure shifts do not extend outside boundaries of image
    if xmin + xshift < 0 or xmax + xshift > npy.shape[2]:
        xshift = 0

    if ymin + yshift < 0 or ymax + yshift > npy.shape[1]:
        yshift = 0

    # CROP IMAGE
    newnpy = npy[zmin:zmax, (ymin+yshift):(ymax+yshift), (xmin+xshift):(xmax+xshift)]

    newimage = sitk.GetImageFromArray(newnpy)
    topleft = [int(xmin+xshift), int(ymin+yshift), zmin]
    neworigin = image.TransformIndexToPhysicalPoint(topleft)

    newimage.SetOrigin(neworigin)
    newimage.SetDirection(direction)
    newimage.SetSpacing(spacing)

    return newimage


def squareImage(image):
    """Makes an SimpleITK image square by padding with zeros
    (square meaning width = height)"""
    [numcols, numrows, numslices] = image.GetSize()
    npy = sitk.GetArrayFromImage(image)

    if numcols < numrows:    #pad columns
        numzerostopad = numrows - numcols
        leftpad = int(numzerostopad / 2)
        rightpad = numzerostopad - leftpad

        newnpy = np.concatenate((np.zeros([numslices, numrows, leftpad]), npy, np.zeros([numslices, numrows, rightpad])), axis=2)

        topleft = [-leftpad, 0, 0]
        neworigin = image.TransformIndexToPhysicalPoint(topleft)

    elif numrows <= numcols:  #pad rows
        numzerostopad = numcols - numrows
        toppad = int(numzerostopad / 2)
        botpad = numzerostopad - toppad

        newnpy = np.concatenate((np.zeros([numslices, toppad, numcols]), npy, np.zeros([numslices, botpad, numcols])), axis=1)

        topleft = [0, -toppad, 0]
        neworigin = image.TransformIndexToPhysicalPoint(topleft)

    paddedimg = sitk.GetImageFromArray(newnpy)
    paddedimg.SetOrigin(neworigin)
    paddedimg.SetDirection(image.GetDirection())
    paddedimg.SetSpacing(image.GetSpacing())

    return paddedimg

def resampleImage(image, finalsize, interpolation='linear'):
    """Resamples SimpleITK image to finalsize x finalsize (width and height in pixels)
    Preserves the original physical size of the image and number of slices
    Changes the resolution so that the new image has numslices x *finalsize x *finalsize dimensions"""
    
    size = image.GetSize()
    numslices = size[2]
    squaresize = size[1]

    # RESAMPLE TO finalsize x finalsize
    finalnpy = np.zeros([numslices, finalsize, finalsize])
    reference = sitk.GetImageFromArray(finalnpy)
    reference.SetOrigin(image.GetOrigin())
    reference.SetDirection(image.GetDirection())

    spacing = image.GetSpacing()
    newspacing = np.zeros(3)
    newspacing[0:2] = (squaresize - 1) * np.array(spacing[0:2]) / (finalsize - 1)
    newspacing[2] = spacing[2]
    reference.SetSpacing(newspacing)


    # MAKING RESAMPLING FILTER
    resample = sitk.ResampleImageFilter()
    resample.SetReferenceImage(reference)
    if interpolation == 'linear':
        resample.SetInterpolator(sitk.sitkLinear)
    elif interpolation == 'NN':
        resample.SetInterpolator(sitk.sitkNearestNeighbor)

    # RESAMPLE TO finalsize x finalsize x n
    resampledimg = resample.Execute(image)

    return resampledimg


def projectImage(reference, moving, interpolate = 'linear'):
    """Projects an SimpleITK image (*moving onto *reference)
    interpolate* = linear or NN (nearest neighbor)"""
    
    resample = sitk.ResampleImageFilter()
    resample.SetReferenceImage(reference)
    if interpolate == 'linear':
        resample.SetInterpolator(sitk.sitkLinear)
    elif interpolate == 'NN':
        resample.SetInterpolator(sitk.sitkNearestNeighbor)

    resampledimg = resample.Execute(moving)

    return resampledimg


def resampleImageToVoxelSize(image, voxelx, voxely, voxelz, interpolation='linear'):
    """Resamples SimpleITK *image* to spacing *[voxelx, voxely, voxelz] in mm
    Preserves the original physical size of the image
    *voxelz is slice thickness (usually)
    *voxelx and *voxely are voxel width and height, respectively
    """
    
    original_spacing = image.GetSpacing()
    original_size = image.GetSize()
    
    new_spacing = [voxelx, voxely, voxelz]
    new_size = [int(round(osz*ospc/nspc)) for osz,ospc,nspc in zip(original_size, original_spacing, new_spacing)]
    # new dimension will be original size * original spacing / new spacing
    # based on physical distance formula: 
    #    original size (pixel) * original spacing (mm / pixel) = new size (pixel) * new spacing (mm / pixel)
    
    if interpolation == 'linear':
        interpolator = sitk.sitkLinear
    elif interpolation == 'NN':
        interpolator = sitk.sitkNearestNeighbor
    
    # creates new image
    new_image = sitk.Resample(image, new_size, sitk.Transform(), interpolator,
                         image.GetOrigin(), new_spacing, image.GetDirection(), 0,
                         image.GetPixelID())
    
    return new_image


def windowImage(image, window_width, window_center, output_min=0, output_max=255):
    """Normalizes SimpleITK *image* (CT scan) based on window specification
    (example, abdominal soft tissue window is W = 400, C = 50, or -350 to 450)
    Clips values above 0 and 1
    """
    
    window_min = window_center - window_width / 2
    window_max = window_center + window_width / 2
    
    output_min = 0
    output_max = 255
    
    windowed_image = sitk.IntensityWindowing(image, window_min, window_max, output_min, output_max)
    
    return windowed_image
    


# Data pre-processing


## Functions

In [9]:
### FUNCTIONS FOR READING/LOADING THE DATA

def getImageAndMaskFilePaths(train_or_val, subtype):
    '''
    returns a list of file paths, sorted alphabetically
    [image_niftis_gz, mask_niftis_gz, image_niftis_panorama, mask_niftis_panorama]
    input: training or validation images ('train' or 'val', and the subtype {0, 1, or 2}
    
    '''
    
    if train_or_val == 'train':
        rootdir = traindir        
    elif train_or_val == 'val':
        rootdir = valdir
        
    subtypedir = os.path.join(rootdir, 'subtype' + str(subtype))
    
    
    ## this gets all of the file paths in the subtype folder for the image and mask files
    
    # example file path for a training image of subtype 0:
    #   '/kaggle/input/pancreas/train/subtype0/quiz_0_041_0000.nii'
        
    image_niftis_gz = [os.path.join(subtypedir, file_name) for file_name in os.listdir(subtypedir) if '0000' in file_name]
    image_niftis_gz.sort()
    
    mask_niftis_gz = [file_name.partition("_0000")[0] + '.nii' for file_name in image_niftis_gz]
    
    
    
    ## however the actual CT / segmentation is within the zipped files:
    #   '/kaggle/input/pancreas/train/subtype0/quiz_0_041_0000.nii/PANORAMA_101960_00001_0000.nii'
    
    
    return [image_niftis_gz, mask_niftis_gz]
   
    
    
def getImageFileDetails(imageniftis, maskniftis, train_or_val):
    '''Creates a dataframe with the following:
        study ID // subtype // CT width/height/depth // pancreas width/height/depth // lesion width/height/depth  // pancreas xmin/xmax / ymin/ymax / zmin/zmax 
          // CT spacing x/y/z 
       
       Input is a list of image nifti file paths, mask nifti file paths (panorama), and {'train' or 'val'}
    '''
    
    ### THIS IS NOT USED TO SPEED UP TRAINING ###
    ### (WAS INITIALLY USED IN 2D and 3D MODELS FROM SCRATCH)
    
    maindf = pd.DataFrame({'ID': pd.Series(dtype='string'),
                   'train/val': pd.Series(dtype='string'),
                   'subtype': pd.Series(dtype='int'),
                   'CT width': pd.Series(dtype='int'),
                   'CT height': pd.Series(dtype='int'),
                   'CT depth': pd.Series(dtype='int'),
                   'panc width': pd.Series(dtype='int'),
                   'panc height': pd.Series(dtype='int'),
                   'panc depth': pd.Series(dtype='int'),
                   'lesion width': pd.Series(dtype='int'),
                   'lesion height': pd.Series(dtype='int'),
                   'lesion depth': pd.Series(dtype='int'),
                   'panc xmin': pd.Series(dtype='int'),
                   'panc xmax': pd.Series(dtype='int'),
                   'panc ymin': pd.Series(dtype='int'),
                   'panc ymax': pd.Series(dtype='int'),
                   'panc zmin': pd.Series(dtype='int'),
                   'panc zmax': pd.Series(dtype='int'),
                   'CT spacing x': pd.Series(dtype='float'),
                   'CT spacing y': pd.Series(dtype='float'),
                   'CT spacing z': pd.Series(dtype='float'),
                   'CT direction x': pd.Series(dtype='float'),
                   'CT direction y': pd.Series(dtype='float'),
                   'CT direction z': pd.Series(dtype='float'),        
                   'image path': pd.Series(dtype='string'),
                   'mask path': pd.Series(dtype='string')
                  })

    for i, imagepath in enumerate(imageniftis):
        maskpath = maskniftis[i]
        
        ## gets the subtype, ID for the current image
        # (format of the file name is: /kaggle/input/pancreas/train/subtype0/quiz_0_041_0000.nii/PANORAMA_101960_00001_0000.nii)
        subfolder = [folder for folder in imagepath.split('/') if 'quiz' in folder][0]   #gets the subdirectory with "quiz"
        
        subtype = subfolder.split('_')[1]
        ID = int(subfolder.split('_')[2])
        
        
        ## gets the dimensions of CT image in pixels
        img = sitk.ReadImage(imagepath)
        
        CTwidth = img.GetWidth()
        CTheight = img.GetHeight()
        CTdepth = img.GetDepth()
                
        ## gets the dimensions of the pancreas and lesion segmentations
        mask = sitk.ReadImage(maskpath)
        mask_vol = sitk.GetArrayFromImage(mask)
    
        [xmin1, xmax1, ymin1, ymax1, zmin1, zmax1] = bbox_3D(np.int64(mask_vol) == 1)   # 1 = pancreas
        [xmin2, xmax2, ymin2, ymax2, zmin2, zmax2] = bbox_3D(np.int64(mask_vol) == 2)   # 2 = lesion
 
        width1 = xmax1 - xmin1
        height1 = ymax1 - ymin1
        depth1 = zmax1 - zmin1
        
        width2 = xmax2 - xmin2
        height2 = ymax2 - ymin2
        depth2 = zmax2 - zmin2

        ## gets the spacing (mm) and 'direction' vectors of x/y/z axis
        spacing = img.GetSpacing()
        direction = img.GetDirection()
        directionx = str(direction[0:2])
        directiony = str(direction[3:5])
        directionz = str(direction[6:8])
        
        ## eventual numpy file names
        image_filename =  os.path.join(numpydir, str(train_or_val) + '_' + "{:03d}".format(ID) + '_image.npy')
        mask_filename = os.path.join(numpydir, str(train_or_val) + '_' + "{:03d}".format(ID) + '_mask.npy')

        
        # study ID // subtype // CT width/height/depth // pancreas width/height/depth // lesion width/height/depth  // pancreas xmin/xmax / ymin/ymax / zmin/zmax 
          #   // CT spacing x/y/z 
        df = pd.DataFrame({'train/val':train_or_val, 'ID':ID, 'subtype':subtype, 'CT width':CTwidth, 'CT height':CTheight, 'CT depth':CTdepth, 
                           'panc width':width1, 'panc height':height1, 'panc depth':depth1, 
                           'lesion width':width2, 'lesion height':height2, 'lesion depth':depth2,
                           'panc xmin':xmin1, 'panc xmax':xmax1, 'panc ymin': ymin1, 'panc ymax':ymax1, 'panc zmin': zmin1, 'panc zmax':zmax1,
                           'CT spacing x':spacing[0], 'CT spacing y':spacing[1], 'CT spacing z':spacing[2], 
                           'CT direction x':directionx, 'CT direction y':directiony, 'CT direction z':directionz, 
                           'image path': imagepath, 'mask path': maskpath, 'image npy': image_filename, 'mask npy':mask_filename}, index = [0])
                           
        
        maindf = pd.concat([maindf, df])
                 
            
    return maindf

def getImageFileDataFrame(imageniftis_gz, maskniftis_gz, train_or_val):
    '''Creates a dataframe with the following:
        study ID // train/val // subtype // image gz path // mask gz path // image pano path // mask pano path
       
       Input is a list of image nifti file paths, mask nifti file paths (panorama), and {'train' or 'val'}
    '''
    
    
    maindf = pd.DataFrame({'ID': pd.Series(dtype='string'),
                           'train/val': pd.Series(dtype='string'),
                           'subtype': pd.Series(dtype='string'),
                           'image gz path': pd.Series(dtype='string'),
                           'mask gz path': pd.Series(dtype='string'),
                          })

    for i, imagepath_gz in enumerate(imageniftis_gz):
        maskpath_gz = maskniftis_gz[i]
        
        ## gets the subtype, ID for the current image
        # (format of the file name is: /kaggle/input/pancreas/train/subtype0/quiz_0_041_0000.nii/PANORAMA_101960_00001_0000.nii)
        subfolder = [folder for folder in imagepath_gz.split('/') if 'quiz' in folder][0]   #gets the subdirectory with "quiz"
        
        subtype = subfolder.split('_')[1]
        ID = subfolder.split('_')[2]
        
        
        
        # study ID // subtype // CT width/height/depth // pancreas width/height/depth // lesion width/height/depth  // pancreas xmin/xmax / ymin/ymax / zmin/zmax 
          #   // CT spacing x/y/z 
        df = pd.DataFrame({'train/val':train_or_val, 'ID':ID, 'subtype':subtype,
                           'image gz path': imagepath_gz, 'mask gz path': maskpath_gz
                           }, index = [0])
                           
        
        maindf = pd.concat([maindf, df])
                 
            
    return maindf

In [10]:
# this loop runs through all of the training and validation images, and builds a dataframe with the metadata

firstloop = True

for train_or_val in ['train', 'val']:
    for subtype in range(3):
        images_gz, masks_gz = getImageAndMaskFilePaths(train_or_val, subtype)
        
        subtypedf = getImageFileDataFrame(images_gz, masks_gz, train_or_val)

        if firstloop:
            maindf = subtypedf
            firstloop = False
        else:
            maindf = pd.concat([maindf, subtypedf])
        

# saving the metadata to csv file

maindf.to_csv(csvpath, index = False)

print('done')

done


In [11]:
# load main dataframe with all of information
maindf = pd.read_csv(csvpath, dtype='string')

traindf =  maindf[maindf['train/val'] == 'train']
valdf = maindf[maindf['train/val'] == 'val']


## Copying data to folders based on nnUnet data organization

### nnUnet data folder structure
nnUNet_raw/ <br>
1. Dataset001_BrainTumour <br>
    - dataset.json <br>
    - imagesTr <br>
    - imagesTs  # optional  <br>
    - labelsTr  <br>
<br>
- imagesTr contains the images belonging to the training cases. nnU-Net will perform pipeline configuration, training with cross-validation, as well as finding postprocessing and the best ensemble using this data.
- imagesTs (optional) contains the images that belong to the test cases. nnU-Net does not use them! This could just be a convenient location for you to store these images. Remnant of the Medical Segmentation Decathlon folder structure.
- labelsTr contains the images with the ground truth segmentation maps for the training cases.
- dataset.json contains metadata of the dataset.

1. nnUNet_raw/Dataset002_Heart/
    1. ├── dataset.json
    2. ├── imagesTr
        1. ├── la_003_0000.nii.gz
        2. ├── la_004_0000.nii.gz
        3. ├── ...
    3. ├── imagesTs
        1. ├── la_001_0000.nii.gz
        2. ├── la_002_0000.nii.gz
        3. ├── ...
    4. └── labelsTr
        1. ├── la_003.nii.gz
        2. ├── la_004.nii.gz
        3. ├── ...

In [12]:
def copyRawFiles(df, images_folder, labels_folder):
    for i, row in df.iterrows():
        if i % 10 == 9:
            print(i+1)
        ID = row['ID']
    
        old_image_path = row['image gz path']
        old_mask_path = row['mask gz path']
    
        image_img = sitk.ReadImage(old_image_path)
        mask_img = sitk.ReadImage(old_mask_path, sitk.sitkInt16)    # get rid of rounding error for segmentations
        new_mask_img = projectImage(image_img, mask_img, interpolate='NN')   # ensures that the voxel spacing is equal
        
        ### new file name scheme
        ### new_image_name = quiz_[ID]_0000.nii.gz
        ### new_mask_name = quiz_[ID].nii.gz
    
        new_image_name = 'quiz_' + ID + '_0000.nii.gz'
        new_mask_name = 'quiz_' + ID + '.nii.gz'
    
        new_image_path = os.path.join(images_folder, new_image_name)
        new_mask_path = os.path.join(labels_folder, new_mask_name)
    
        sitk.WriteImage(image_img, new_image_path)        
        sitk.WriteImage(new_mask_img, new_mask_path)

In [14]:
raw_folder = r'C:\Users\Leo\Documents\UHN-MedImg3D-ML-quiz\nnUnet_raw'
data_folder = r'C:\Users\Leo\Documents\UHN-MedImg3D-ML-quiz\nnUnet_raw\Dataset001_Pancreas'

imagesTr_folder = os.path.join(data_folder, 'imagesTr')
labelsTr_folder = os.path.join(data_folder, 'labelsTr')

for folder in [raw_folder, data_folder, imagesTr_folder, imagesTs_folder, labelsTr_folder, labelsTs_folder]:
    if not os.path.exists(folder):
        os.makedirs(folder)


print('copying training files...')

copyRawFiles(traindf, imagesTr_folder, labelsTr_folder)

print(' -- done')



copying training files...
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
 -- done
copying val files


NameError: name 'images_Ts_folder' is not defined

In [17]:
imagesTs_folder = os.path.join(data_folder, 'imagesTs')
labelsTs_folder = os.path.join(data_folder, 'labelsTs')

print('copying val files')
copyRawFiles(valdf, imagesTs_folder, labelsTs_folder)

    
print('  -- done')

copying val files
260
270
280
  -- done


### dataset.json

In [18]:
dataset_json_path = os.path.join(data_folder, 'dataset.json')

dictionary = {
    "channel_names": {  # formerly modalities
        "0": "CT", 
        }, 
    
    "labels": {  # THIS IS DIFFERENT NOW!
        "background": 0,
        "pancreas": 1,
        "lesion": 2
        }, 
 
    "numTraining": 252, 
 
    "file_ending": ".nii.gz",
 
    "overwrite_image_reader_writer": "SimpleITKIO"  # optional! If not provided nnU-Net will automatically determine the ReaderWriter
}
 
json_object = json.dumps(dictionary, indent=4)
 
with open(dataset_json_path, "w") as outfile:
    outfile.write(json_object)

# Training nnUnet

## Pre-processing

!nnUNetv2_plan_and_preprocess -d DATASET_ID --verify_dataset_integrity
<br>
<br>
!nnUNet_raw="/kaggle/working/nnUnet_raw" nnUNet_preprocessed="/kaggle/working/nnUNet_preprocessed" nnUNet_results="/kaggle/working/nnUNet_results" nnUNetv2_plan_and_preprocess -d 001 --verify_dataset_integrity  -pl nnUNetPlannerResEncM
<br>
(for ResNet encoder)

In [5]:
!nnUNetv2_plan_and_preprocess -d 001 --verify_dataset_integrity
#!nnUNet_raw="/kaggle/working/nnUnet_raw" nnUNet_preprocessed="/kaggle/working/nnUNet_preprocessed" nnUNet_results="/kaggle/working/nnUNet_results" nnUNetv2_plan_and_preprocess -d 001 --verify_dataset_integrity

Fingerprint extraction...
Dataset001_Pancreas
Using <class 'nnunetv2.imageio.simpleitk_reader_writer.SimpleITKIO'> reader/writer

####################
verify_dataset_integrity Done. 
If you didn't see any error messages then your dataset is most likely OK!
####################

Experiment planning...

############################
INFO: You are using the old nnU-Net default planner. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################

Dropping 3d_lowres config because the image size difference to 3d_fullres is too small. 3d_fullres: [ 59. 118. 181.], 3d_lowres: [59, 118, 181]
2D U-Net configuration:
{'data_identifier': 'nnUNetPlans_2d', 'preprocessor_name': 'DefaultPreprocessor', 'batch_size': 132, 'patch_size': (128, 192), 'median_image_size_in_voxels': array([118., 181.]), 'spacing': array([0.73046875, 0.73046875]), 'normalization_schemes': 


  0%|          | 0/252 [00:00<?, ?it/s]
  0%|          | 1/252 [00:07<29:25,  7.03s/it]
  1%|          | 2/252 [00:07<12:18,  2.96s/it]
  1%|1         | 3/252 [00:08<08:45,  2.11s/it]
  2%|1         | 4/252 [00:08<06:16,  1.52s/it]
  2%|1         | 5/252 [00:08<04:08,  1.01s/it]
  2%|2         | 6/252 [00:09<03:00,  1.37it/s]
  3%|3         | 8/252 [00:09<01:36,  2.52it/s]
  4%|3         | 10/252 [00:09<01:16,  3.17it/s]
  5%|4         | 12/252 [00:10<01:05,  3.66it/s]
  5%|5         | 13/252 [00:10<00:56,  4.22it/s]
  6%|5         | 15/252 [00:11<01:15,  3.15it/s]
  6%|6         | 16/252 [00:11<01:19,  2.98it/s]
  7%|6         | 17/252 [00:11<01:11,  3.28it/s]
  8%|7         | 19/252 [00:12<01:10,  3.30it/s]
  9%|8         | 22/252 [00:12<00:41,  5.58it/s]
 10%|9         | 24/252 [00:12<00:49,  4.62it/s]
 10%|9         | 25/252 [00:13<00:48,  4.68it/s]
 10%|#         | 26/252 [00:13<00:56,  3.97it/s]
 11%|#         | 27/252 [00:14<01:19,  2.83it/s]
 11%|#1        | 28/252 [00:14<01:0

## Training

can use ' -p nnUNetResEncUNetMPlans ' (for ResNet encoder)

In [None]:
'''
usage: nnUNetv2_train [-h] [-tr TR] [-p P]
                      [-pretrained_weights PRETRAINED_WEIGHTS]
                      [-num_gpus NUM_GPUS] [--use_compressed] [--npz] [--c]
                      [--val] [--val_best] [--disable_checkpointing]
                      [-device DEVICE]
                      dataset_name_or_id configuration fold

positional arguments:
  dataset_name_or_id    Dataset name or ID to train with
  configuration         Configuration that should be trained
  fold                  Fold of the 5-fold cross-validation. Should be an int
                        between 0 and 4.

options:
  -h, --help            show this help message and exit
  -tr TR                [OPTIONAL] Use this flag to specify a custom trainer.
                        Default: nnUNetTrainer
  -p P                  [OPTIONAL] Use this flag to specify a custom plans
                        identifier. Default: nnUNetPlans
  -pretrained_weights PRETRAINED_WEIGHTS
                        [OPTIONAL] path to nnU-Net checkpoint file to be used
                        as pretrained model. Will only be used when actually
                        training. Beta. Use with caution.
  -num_gpus NUM_GPUS    Specify the number of GPUs to use for training
  --use_compressed      [OPTIONAL] If you set this flag the training cases
                        will not be decompressed. Reading compressed data is
                        much more CPU and (potentially) RAM intensive and
                        should only be used if you know what you are doing
  --npz                 [OPTIONAL] Save softmax predictions from final
                        validation as npz files (in addition to predicted
                        segmentations). Needed for finding the best ensemble.
  --c                   [OPTIONAL] Continue training from latest checkpoint
  --val                 [OPTIONAL] Set this flag to only run the validation.
                        Requires training to have finished.
  --val_best            [OPTIONAL] If set, the validation will be performed
                        with the checkpoint_best instead of checkpoint_final.
                        NOT COMPATIBLE with --disable_checkpointing! WARNING:
                        This will use the same 'validation' folder as the
                        regular validation with no way of distinguishing the
                        two!
  --disable_checkpointing
                        [OPTIONAL] Set this flag to disable checkpointing.
                        Ideal for testing things out and you dont want to
                        flood your hard drive with checkpoints.
  -device DEVICE        Use this to set the device the training should run
                        with. Available options are 'cuda' (GPU), 'cpu' (CPU)
                        and 'mps' (Apple M1/M2). Do NOT use this to set which
                        GPU ID! Use CUDA_VISIBLE_DEVICES=X nnUNetv2_train
                        [...] instead!
'''

## Training 2D

nnUNetv2_train DATASET_NAME_OR_ID 2d FOLD [--npz]
(fold = 0)

In [7]:
#!nnUNet_raw="/kaggle/working/nnUnet_raw" nnUNet_preprocessed="/kaggle/working/nnUNet_preprocessed" nnUNet_results="/kaggle/working/nnUNet_results" nnUNetv2_train 001 2d 0 -device cuda --npz
!nnUNetv2_train 001 2d 0 -device cuda --npz

Exception in background worker 0:

  self.grad_scaler = GradScaler() if self.device.type == 'cuda' else None
Traceback (most recent call last):
  File "C:\Users\Leo\anaconda3\envs\WangLabQuiz\Lib\site-packages\batchgenerators\dataloading\nondet_multi_threaded_augmenter.py", line 53, in producer
    item = next(data_loader)
           ^^^^^^^^^^^^^^^^^
  File "C:\Users\Leo\anaconda3\envs\WangLabQuiz\Lib\site-packages\batchgenerators\dataloading\data_loader.py", line 126, in __next__
    return self.generate_train_batch()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Leo\OneDrive\Documents\GitHub\WangLabQuiz\nnUNet\nnunetv2\training\dataloading\data_loader_2d.py", line 21, in generate_train_batch
    data, seg, properties = self._data.load_case(current_key)
                            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Leo\OneDrive\Documents\GitHub\WangLabQuiz\nnUNet\nnunetv2\training\dataloading\nnunet_dataset.py", line 86, in load_case
    data = np.load(entry['data_file'][:-4] + ".npy", 'r+')



 [Errno 13] Permission denied: 'C:\\Users\\Leo\\OneDrive\\Documents\\UHN-MedImg3D-ML-quiz\\nnUnet_preprocessed\\Dataset001_Pancreas\\nnUNetPlans_2d\\quiz_389_seg.npy'
Exception in background worker 5:
 [Errno 13] Permission denied: 'C:\\Users\\Leo\\OneDrive\\Documents\\UHN-MedImg3D-ML-quiz\\nnUnet_preprocessed\\Dataset001_Pancreas\\nnUNetPlans_2d\\quiz_123.npy'

############################
INFO: You are using the old nnU-Net default plans. We have updated our recommendations. Please consider using those instead! Read more here: https://github.com/MIC-DKFZ/nnUNet/blob/master/documentation/resenc_presets.md
############################

Using device: cuda:0

#######################################################################
Please cite the following paper when using nnU-Net:
Isensee, F., Jaeger, P. F., Kohl, S. A., Petersen, J., & Maier-Hein, K. H. (2021). nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation. Nature methods, 18(2), 203-211.
####

## Training 3D full res
nnUNetv2_train DATASET_NAME_OR_ID 3d_fullres FOLD [--npz]
(fold = 0)

In [None]:
#!nnUNet_raw="/kaggle/working/nnUnet_raw" nnUNet_preprocessed="/kaggle/working/nnUNet_preprocessed" nnUNet_results="/kaggle/working/nnUNet_results" nnUNetv2_train 001 3d_fullres 0 -device cuda --npz