In [2]:
import torch
import nibabel as nib
import os
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset
import numpy as np
import time
from tqdm.auto import tqdm

In [5]:
def make_img_path(cid):
    case_id = "case_{:05d}".format(cid)
    root=os.path.join('./data',case_id)
    return os.path.join(root,'imaging.nii.gz')

def make_seg_path(cid):
    case_id = "case_{:05d}".format(cid)
    root=os.path.join('./data',case_id)
    return os.path.join(root,'segmentation.nii.gz')

# case 0~ case 159 Train set, case 161~case209 Test set

# Since only case 160's image size is not 512X512, I discard case 160

In [3]:
if not os.path.exists('./data_npy'):
    os.mkdir('./data_npy')
if not os.path.exists('./data_npy/train'):
    os.mkdir('./data_npy/train')
if not os.path.exists('./data_npy/valid'):
    os.mkdir('./data_npy/valid')


In [18]:
count=0
root=os.path.join('.','data_npy','train','image')
if not os.path.exists(root):
    os.mkdir(root)
    
for i in tqdm(range(160)):
    img=nib.load(make_img_path(i)).get_fdata()
    img=(img-img.min())/(max((img.max()-img.min()),1e-3))
    
    for j in range(len(img)):
        path=os.path.join(root,'{:05d}'.format(j+count))
        np.save(path,img[j:j+1].astype(np.float32))
    count+=len(img)


  0%|          | 0/160 [00:00<?, ?it/s]

In [6]:
count=0
root=os.path.join('.','data_npy','train','segmentation')
if not os.path.exists(root):
    os.mkdir(root)
    
for i in tqdm(range(160)):
    seg=nib.load(make_seg_path(i)).get_fdata()
    seg_no_cancer=np.where(seg>0,1,0).astype(np.uint8)
    
    for j in range(len(seg_no_cancer)):
        path=os.path.join(root,'{:05d}'.format(j+count))
        seg_1ch=torch.tensor(seg_no_cancer[j:j+1],dtype=torch.int64)
        seg_2ch=F.one_hot(seg_1ch,num_classes=2)
        seg_2ch=torch.squeeze(seg_2ch.permute(3,0,1,2))
        seg_2ch=np.array(seg_2ch,dtype=np.uint8)
        np.save(path,seg_2ch)
    count+=len(seg_no_cancer)

  0%|          | 0/160 [00:00<?, ?it/s]

In [7]:
count=0
root=os.path.join('.','data_npy','valid','image')
if not os.path.exists(root):
    os.mkdir(root)
    
for i in tqdm(range(161,210)):
    img=nib.load(make_img_path(i)).get_fdata()
    img=(img-img.min())/(max((img.max()-img.min()),1e-3))
    
    for j in range(len(img)):
        path=os.path.join(root,'{:05d}'.format(j+count))
        np.save(path,img[j:j+1].astype(np.float32))
    count+=len(img)

  0%|          | 0/49 [00:00<?, ?it/s]

In [8]:
count=0
root=os.path.join('.','data_npy','valid','segmentation')
if not os.path.exists(root):
    os.mkdir(root)
    
for i in tqdm(range(161,210)):
    seg=nib.load(make_seg_path(i)).get_fdata()
    seg_no_cancer=np.where(seg>0,1,0).astype(np.uint8)
    
    for j in range(len(seg_no_cancer)):
        path=os.path.join(root,'{:05d}'.format(j+count))
        seg_1ch=torch.tensor(seg_no_cancer[j:j+1],dtype=torch.int64)
        seg_2ch=F.one_hot(seg_1ch,num_classes=2)
        seg_2ch=torch.squeeze(seg_2ch.permute(3,0,1,2))
        seg_2ch=np.array(seg_2ch,dtype=np.uint8)
        np.save(path,seg_2ch)
    count+=len(seg_no_cancer)

  0%|          | 0/49 [00:00<?, ?it/s]