# Make Label Image
1. Filter Annotation by malignancy
2. Make empty image which has same size with volume
3. Load contour in annotations
4. Save contour image for whole CT image size

Use Pylidc, OpenCV

# Extract Training Dataset

1. Filter Annotation by malignancy
2. Load all dicom image with annotation
3. Find slices which has nodules
4. Save slices which has nodules

Use Encapsulation <br>
Normalization Using OpenCV

## Error Report
1. Reversed Slice Order
2. Image Number Mismatch <br>
    Label : contour / j =  range(ann.contour_slice_indices.shape[0]) <br> 
    Training : scan / j = ann.contour_slice_indices <br>
    slice indices does not have continuity from nodule #206

In [3]:
import pylidc as pl
import numpy as np
import matplotlib.pyplot as plt
import cv2

# For contour to boolean mask function
import matplotlib.path as mplpath

In [27]:
class AnnAll:
    """
    Do something for nodules, annotations in anns objects.
    
    Attributes
    ==========
    
    annotation_numbers : int
        Number of annotations in anns objects.
    """
    
    def __init__(self, anns):
        self.anns = anns
        self.nodule_num = anns.count()
        print("Total Nodule numbers :", self.nodule_num)
    
    def extractCT(self, addr_label, addr_train, num_iter=np.inf):
        """
        extract CT Images which has nodules
        Make Label Images which has nodules
        """
        for i, ann in enumerate(self.anns):
            if not i < num_iter:
                break
                
            # Load image for each annotation
            image_all = ann.scan.load_all_dicom_images(verbose=False)
            
            # Make empty image which has same size with the CT image
            xx, yy = np.indices(image_all[0].pixel_array.shape)

            # Make the grid for check points included in the region
            grid = np.c_[xx.flatten(), yy.flatten()]
            
            for j in range(len(ann.contour_slice_indices)):
                index_max = ann.contour_slice_indices[-1]
                # Extract Label Image
                con = ann.contours[j]
                
                # Extract nodule points for 2-d array
                C = con.to_matrix(include_k=False)

                # Make the path with the nodule points
                path = mplpath.Path(C, closed=True)
                #np.c_ : stack 1-d arrays as columns into a 2-d array

                # Check points included in the nodule region
                contains_pts = path.contains_points(grid)

                # Reshape the image for 2-d
                contains_pts = contains_pts.reshape(512,512)

                # Convert boolean to uint8 image
                contains_pts_img = (contains_pts * 255).astype('uint8')

                # Save Label Image
                cv2.imwrite(addr_label + str(i) + '_' + str(j) + ".png", 
                            contains_pts_img)
                
                # Extract CT Image for label index        
                # Find CT image position which has nodule
                k = con.image_k_position
                
                # Load image in numpy array
                image = image_all[k].pixel_array
                
                # Normalize the image
                norm_image = cv2.normalize(image, None, 255, 0, cv2.NORM_MINMAX, cv2.CV_8U)

                # Save Image
                cv2.imwrite(addr_train + str(i) + '_' + str(j) + ".png", 
                            norm_image)
                

            # Print progress for 50 images each
            if i%50 == 49:
                print("%d / %d Image Extraction Completed" % (i+1, self.nodule_num))
                
        print("Training Dataset Extraction Completed")

In [None]:
nodule = pl.query(pl.Annotation) \
    .filter(pl.Annotation.malignancy > 3) # malignancy > 3 : nodule (1653 nodules, 15221 images)

addr_label = "./Labels/nodules/Image" 
addr_train = "./Training/nodules/Image"

all1 = AnnAll(nodule)
all1.extractCT(addr_label, addr_train)

nonnodule = pl.query(pl.Annotation) \
    .filter(pl.Annotation.malignancy < 3) # malignancy < 3 : non-nodule (2600 nodules, 13115 images)

addr_label = "./Labels/non-nodules/Image" 
addr_train = "./Training/non-nodules/Image"

all2 = AnnAll(nonnodule)
all2.extractCT(addr_label, addr_train)

Total Nodule numbers : 1653
50 / 1653 Image Extraction Completed
100 / 1653 Image Extraction Completed
150 / 1653 Image Extraction Completed
200 / 1653 Image Extraction Completed
250 / 1653 Image Extraction Completed


In [21]:
nodule[206].contour_slice_indices

array([564, 565, 566, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577,
       578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590,
       591, 592, 593, 594, 595, 596, 597, 598], dtype=int64)

In [22]:
nodule[206].contours[0].image_k_position

598

In [23]:
len(nodule[206].contours)

34

In [24]:
nodule[206].contour_slice_indices[-1]

598

In [26]:
print(nodule[205].contour_slice_indices)
len(nodule[205].contours)

[660 661 662 663 664 665 666]


7