In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import glob

In [2]:
config_paths = {
    "luna16":"../../luna16/LUNA16/",
    "segment":"../../luna16/seg-lungs-LUNA16/",
    "annotations":"../../luna16/annotations.csv",
    "candidates":"../../luna16/candidates.csv",
    "slide_v1":"../center-coord-chunk",
    "slide_v2":"../center-coord-chunk-v2",
}

annotations = pd.read_csv(config_paths['annotations'])

In [3]:
annotations.head(2)

Unnamed: 0,seriesuid,coordX,coordY,coordZ,diameter_mm
0,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,-128.699421,-175.319272,-298.387506,5.651471
1,1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222...,103.783651,-211.925149,-227.12125,4.224708


In [4]:
# uitil
def get_uids(subset):
    files = os.listdir(config_paths["slide_v2"] + f"/subset{subset}-gt")

    uids = [f.split("_")[0] for f in files]

    return list(set(uids))

def num_annos_of_uid(uid):
    uids = annotations["seriesuid"].values

    return np.count_nonzero(uids == uid)

def num_annos_in_subset(subset):
    subset_uids = get_uids(subset)
    all_uids = annotations["seriesuid"].values

    result = 0

    for uid in subset_uids:
        result += np.count_nonzero(all_uids == uid)

    return result

def count_pixel(paths):
    """
    paths: path of slide annos of 1 image
    
    return total pixel and annos pixel mask.
    return list percentage per annos
    """
    total_pixel = 0
    annos_pixel = 0
    l_percentage = []

    for path in paths:
        image = np.load(path)

        total_pixel+= image.size
        annos_pixel += np.count_nonzero(image == 1)
        l_percentage.append(np.count_nonzero(image == 1)/(np.count_nonzero(image == 1) + image.size))

    return total_pixel, annos_pixel, l_percentage 

def get_index(image):
    """
    image: np.array of image

    return index contain value = 1 (mask value)
    """
    result = []

    for i in range(len(image)):
        if np.count_nonzero(image[i] == 1) > 0:
            result.append(i)

    return result

In [5]:
def explore(subset, each_percentage=False):

    path = f"{config_paths['slide_v2']}/subset{subset}-gt/"
    total_file = os.listdir(path)

    # get uids
    uids = get_uids(subset)
    total_annos = num_annos_in_subset(subset)

    print("Total file in subset:", len(total_file))
    print("Total annotation:", total_annos)
    print("Total uids:", len(uids))

    #### explore percentage
    total_pixel = 0
    annos_pixel = 0
    l_percentage = []

    for uid in uids:
        annos_paths = glob.glob(f"{config_paths['slide_v2']}/subset{subset}-gt/{uid}_nod_?_center_gt.npy*")
        t, a, l = count_pixel(annos_paths)

        total_pixel += t
        annos_pixel += a
        l_percentage.extend(l)

    if each_percentage:
        return total_pixel, annos_pixel, l_percentage
    else:
        return total_pixel, annos_pixel 


explore(0)

Total file in subset: 1056
Total annotation: 112
Total uids: 67


(7340032, 71788)

In [6]:
def total_percentage():
    subsets = list(range(10))

    total = 0
    annos = 0

    for subset in subsets:
        t, a = explore(subset)
        
        total += t
        annos += a

    return total, annos

total_percentage()

# (74121216, 768914)
# percentage = [768914/(768914 + 74121216)]*100 = 1.0267%

Total file in subset: 1056
Total annotation: 112
Total uids: 67
Total file in subset: 1175
Total annotation: 128
Total uids: 61
Total file in subset: 1276
Total annotation: 128
Total uids: 56
Total file in subset: 1051
Total annotation: 119
Total uids: 65
Total file in subset: 1188
Total annotation: 128
Total uids: 62
Total file in subset: 1042
Total annotation: 108
Total uids: 54
Total file in subset: 1179
Total annotation: 129
Total uids: 63
Total file in subset: 1016
Total annotation: 111
Total uids: 54
Total file in subset: 600
Total annotation: 65
Total uids: 33
Total file in subset: 1000
Total annotation: 105
Total uids: 59


(74121216, 768914)

### percentage full image

In [7]:
# image "*_gt.npy"
# index "*_posindex.np" not 100% correctly

def get_image_path(subset):
    paths = glob.glob(f"subset{subset}-gt/*_gt.npy")

    return paths

# def 

In [8]:
def explore_full_image(subset, each_percentage=False):

    paths = get_image_path(subset)
    print(f"Total image subset {subset}:", len(paths))


    # caculate percentage
    total_pixel = 0
    annos_pixel = 0
    l_precentage = []

    for path in paths:
        t, a, l = count_pixel([path])

        total_pixel += t
        annos_pixel += a
        l_precentage.extend(l)

    if each_percentage:
        return total_pixel, annos_pixel, l_precentage
    else:
        return total_pixel, annos_pixel
    
explore_full_image(0)    

Total image subset 0: 89


(1553517277, 1449383)

In [9]:
def total_percentage_image():
    subsets = list(range(10))

    total = 0
    annos = 0
    for subset in subsets:
        t, a = explore_full_image(subset)

        total += t
        annos += a

    return total, annos

total_percentage_image()
# (15695208565, 21511938)
# percentage = [21511938/(21511938 + 15695208565)]*100 = 0.1368%

Total image subset 0: 89
Total image subset 1: 89
Total image subset 2: 89
Total image subset 3: 89
Total image subset 4: 89
Total image subset 5: 89
Total image subset 6: 89
Total image subset 7: 89
Total image subset 8: 88
Total image subset 9: 88


(15695208565, 21511938)