# Why 3D volumes?

Because a 3d model like 3D unet can make use of 3D information and learn volumetric information.

Here I show how to make 3D volumes out of this dataset. The dataset is also saved at the end and can be directly used. 


In [None]:
# Importing required libraries
import numpy as np
import os
import time
import glob
import cv2
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from tqdm.notebook import tqdm
import pickle

In [None]:
# Taking a look at the dataset
df_train = pd.read_csv('../input/uw-madison-gi-tract-image-segmentation/train.csv')
print(df_train.shape)
df_train.head(4)

Annotation information is in segmentation column

In [None]:
#unique ids
ids = np.unique(df_train[df_train['segmentation'].isna()==False]['id'].str.split('_day').apply(lambda x : x[0]).apply(lambda x: x.split('case')[1]).to_list())
print(ids.shape)
print(ids[:10])

So, there are 85 cases, where each of them has multiple sessions of a patient

In [None]:
# takin a look at a slice of one sample and its mask
train_path = '../input/uw-madison-gi-tract-image-segmentation/train'
case= 154
day =16
slice_num=75
mask_path = df_train[df_train['id'] == f'case{case}_day{day}_slice_{str(slice_num).zfill(4)}']
image_path = glob.glob(f'{train_path}/case{case}/case{case}_day{day}/scans/slice_{str(slice_num).zfill(4)}_*.png')[0]
image = plt.imread(image_path)

def rle_decode(mask_rle, shape):
    h, w = shape
    img = np.zeros((h * w,), dtype=np.float32)
    for i in range(3):
        mask = mask_rle[i]
        if mask==mask:
            s = np.array(mask.split(), dtype=int)
            starts = s[0::2] - 1
            lengths = s[1::2]
            ends = starts + lengths
            for lo, hi in zip(starts, ends):
                img[lo : hi] = i+1
    return img.reshape(shape).astype(np.uint8)

mask = rle_decode(mask_path.segmentation.values, image.shape)

fig, ax = plt.subplots(1,3,figsize=(10,6))
ax[0].imshow(image)
ax[1].imshow(mask)
ax[2].imshow(image,'gray')
ax[2].imshow(mask, alpha=0.5)
plt.show()

**Here, we make the 3d volumes, save the volume inputs with _x.npy suffix and the segmentation volumes with _y.npy**

In [None]:
# Make 3d volumes
os.makedirs('./3d_volumes_masks', exist_ok=True)
for root, dirs, files in sorted(os.walk(train_path)):
    if len(files)>10:
        dim1 = np.int32(sorted(files)[0].split('_')[3])
        dim2 = np.int32(sorted(files)[0].split('_')[2])
        image_3d= np.zeros((dim1,dim2,len(files))).astype(np.float32)
        label_3d = np.zeros((dim1,dim2,len(files))).astype(np.uint8)
        file_name = root.split('/')[5]
        for i, image_path in enumerate(sorted(files)):
            slice_num = image_path.split('_')[1]
            image = Image.open(os.path.join(root,image_path))
            image_3d[:,:,i] = image
            label_path = df_train[df_train['id'] == f"{file_name}_slice_{str(slice_num).zfill(4)}"]
            label_3d[:,:,i] = rle_decode(label_path.segmentation.values, (dim1, dim2))
        np.save(os.path.join('./3d_volumes_masks/', file_name+'_x'), image_3d, allow_pickle=True)
        np.save(os.path.join('./3d_volumes_masks/', file_name+'_y'), label_3d, allow_pickle=True)
        print(file_name , 'saved with', len(files), 'slices')

That's it, now we have 3D X and y both ready in npy format