In [None]:
# instalar as bibliotecas
!pip install fast_slic
!conda install '/kaggle/input/pydicom-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-helper/libgcc-ng-9.3.0-h2828fa1_19.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-helper/gdcm-2.8.9-py37h500ead1_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-helper/conda-4.10.1-py37h89c1867_0.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-helper/certifi-2020.12.5-py37h89c1867_1.tar.bz2' -c conda-forge -y
!conda install '/kaggle/input/pydicom-helper/openssl-1.1.1k-h7f98852_0.tar.bz2' -c conda-forge -y

In [None]:
import SimpleITK as sitk
import numpy as np
import pandas as pd

import imageio
import random

from fast_slic import Slic
from PIL import Image

import glob
import ujson
import time
import os

import multiprocessing

from multiprocessing import Pool, Manager, Process, Lock

import pydicom
from pydicom.pixel_data_handlers.util import apply_voi_lut

def rand_color(n):
    colors = []
    for i in range(n):
        r = random.randint(0, 255)
        g = random.randint(0, 255)
        b = random.randint(0, 255)
        colors.append([r,g,b])
    return colors

def read_xray(path, voi_lut = True, fix_monochrome = True):
    # Original from: https://www.kaggle.com/raddar/convert-dicom-to-np-array-the-correct-way
    dicom = pydicom.read_file(path)
    
    # VOI LUT (if available by DICOM device) is used to transform raw DICOM data to 
    # "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
               
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
        
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data

def make_sp_from_array(image, side=10, comp=10):
    nsp = int((image.shape[0]*image.shape[1])/(side*side))
    slic = Slic(num_components=nsp, compactness=comp)       
    ma_arr = slic.iterate(image) # Cluster Map
    ma_arr = ma_arr+1
    return ma_arr

def makesp(imagefi=None, imagefo=None, side=10, comp=10, issave=False):
    ma_arr = None
   
    #with Image.open(imagefi) as f:       
    if True:
        f = sitk.ReadImage(imagefi, sitk.sitkVectorUInt8)
        #image = np.array(f)
        
        image = sitk.GetArrayFromImage(f)
        print("nda***", image)

        #image = np.zeros([1000, 800, 3], dtype=np.uint8)

        nsp = int((image.shape[0]*image.shape[1])/(side*side))
        print(side, nsp)
        print("image",image.shape)
        # import cv2; image = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)   # You can convert the image to CIELAB space if you need.
        slic = Slic(num_components=nsp, compactness=comp)
       
        ma_arr = slic.iterate(image) # Cluster Map
        ma_arr = ma_arr+1
        
        print(ma_arr)
        
        if issave:
            ma = sitk.GetImageFromArray(ma_arr)
            #ma.CopyInformation(image_sitk)
            sitk.WriteImage(ma, imagefo, True)

    return ma_arr

def drawsp(fmask, imagefo):
    # read mask
    mask = sitk.ReadImage(fmask)
    mask_array = sitk.GetArrayFromImage(mask)

    lsif = sitk.LabelShapeStatisticsImageFilter()
    lsif.Execute(mask)

    labels = lsif.GetLabels()
    colors = rand_color(len(labels)+100)

    image_out = np.zeros((mask_array.shape[0], mask_array.shape[1], 3), dtype=np.uint8)
    for l in labels:
        index = np.where(mask_array == int(l))        
        image_out[index] = colors[int(l)]

    # write out image
    imageio.imsave(imagefo, image_out)

    
def process_sp(arg):
    c = arg["c"]
    spmap = arg["sp"]
    gt_arr = arg["gt"]
    la = arg["la"]

    pcs=[0.0, 0.0, 0.0, 0.0]
    if c>1:            
        sp_count = len(spmap[spmap==la])
        c_count = float(np.count_nonzero(gt_arr[spmap==la] == c))
        pcs[c-1] = 0.0 if c_count==0.0 else c_count/sp_count
    return la, pcs
    
def make_superpixels_train(side=50, l_inf=None, l_sup=None, save_gt=False):
    path_gt = "./groundtruth/"
    if not os.path.exists(path_gt):
        os.makedirs(path_gt)

    path_sp = "./superpixels/"+str(side)
    if not os.path.exists(path_sp):
        os.makedirs(path_sp)

    # make combine train csv
    df_image_level = pd.read_csv("../input/siim-covid19-detection/train_image_level.csv")
    df_study_level = pd.read_csv("../input/siim-covid19-detection/train_study_level.csv")
    
    df_image_level["image_id"] = df_image_level.id.map(lambda x: x.split('_')[0])
    df_study_level["StudyInstanceUID"] = df_study_level.id.map(lambda x: x.split('_')[0])
    
    df_image_level.rename(columns={"id": "id_image"}, inplace=True)
    df_study_level.rename(columns={"id": "id_study"}, inplace=True)
    
    #print("df_image_level", df_image_level.shape[0])

    df_com = pd.merge(df_image_level, df_study_level, how="left", on=["StudyInstanceUID"])
    #df_com.to_csv('train_image_study_level.csv',index=False)
    
    dcm_path = glob.glob('/kaggle/input/siim-covid19-detection/train/**/*dcm', recursive=True)
    img_meta = pd.DataFrame({'dcm_path':dcm_path})
    img_meta['image_id'] = img_meta.dcm_path.map(lambda x: x.split('/')[-1].replace('.dcm', ''))
    img_meta['study_id'] = img_meta.dcm_path.map(lambda x: x.split('/')[-3].replace('.dcm', ''))
    #img_meta.to_csv('train_img_meta.csv',index=False)
    
    df_com = pd.merge(df_com, img_meta, how="left", on=["image_id"])
    df_com.to_csv('train_combine.csv',index=False)

    #print(df_study_level)
    #print("df_com", df_com)
    
    #print("df_com", df_com.shape[0])
    #print(df_com.head())
    
    
    # make superpixels and csv file
    dfs_image_id = []
    dfs_image_study = []
    dfs_label_sp = []
    dfs_label_1 = []
    dfs_label_2 = []
    dfs_label_3 = []
    dfs_label_4 = []    
    dfs_label_1_p = []
    dfs_label_2_p = []
    dfs_label_3_p = []
    dfs_label_4_p = []

    ii = 0
    # for each image
    if l_inf!=None and l_sup!=None:
        df_com = df_com.iloc[l_inf:l_sup,]
        
    for index, row in df_com.iterrows():
        tic = time.time()
        #get label           
        c = 0
        if row["Negative for Pneumonia"] > 0:
            c = 1
        elif row["Typical Appearance"] > 0:
            c = 2
        elif row["Indeterminate Appearance"] > 0:
            c = 3
        elif row["Atypical Appearance"] > 0:
            c = 4
            
            
        # make superpixels
        #print(row["dcm_path"])
        xray = read_xray(row['dcm_path'])
        rgb_img = np.stack((xray,)*3, axis=-1)
        spmap = make_sp_from_array(rgb_img, side=side)
        # save superpixels
        sitk.WriteImage(sitk.GetImageFromArray(spmap), "./superpixels/"+str(side)+"/"+row["id_image"]+".nrrd", True)

        hs, ws = spmap.shape[0],spmap.shape[1]
        
        #print(row['boxes'])
        #args = ujson.loads("""+"""+str(row['boxes'])+"""+""")
        #print(args)
        
        #make boxes
        bboxes = []
        bbox = []
        scale = 1
        for i, l in enumerate(row['label'].split(' ')):
            if (i % 6 == 0) | (i % 6 == 1):
                continue
            bbox.append(float(l)/scale)
            if i % 6 == 5:
                bboxes.append(bbox)
                bbox = []    
                
        #if c==1:
        #    print(bboxes)

        gt_arr = np.zeros((hs, ws), dtype=np.uint16)
        for box in bboxes:
            x = int(box[0])
            y = int(box[1])
            h = int(box[2])
            w = int(box[3])
            sub_img = gt_arr[y:y+h, x:x+w]
            sub_img = c
            gt_arr[y:y+h, x:x+w] = sub_img
        if save_gt:
            sitk.WriteImage(sitk.GetImageFromArray(gt_arr), "./groundtruth/"+row["id_image"]+".nrrd", True)
        #print(gt_arr)
        
        #print(spmap.shape, gt_arr.shape)
        
        
        # read each superpixel
        spimg = sitk.GetImageFromArray(spmap)
        lsif = sitk.LabelShapeStatisticsImageFilter()
        lsif.Execute(spimg)
        labels = lsif.GetLabels()
        #print(labels)
        
        
        argpass = []
        for la in labels:
            argpass.append({
                "c":c,
                "la":la,
                "sp":spmap,
                "gt":gt_arr
            })
        
        
        ncpus = 20
        #ncpus = multiprocessing.cpu_count()-1
        pool = Pool(processes=ncpus)
        rr = pool.map(process_sp, argpass)
        pool.close()

        for rs in rr:
            la = rs[0]
            pcs = rs[1]
            dfs_image_id.append(row['id_image'])
            dfs_image_study.append(row['StudyInstanceUID'])
            dfs_label_sp.append(la)
            dfs_label_1.append(row['Negative for Pneumonia'])
            dfs_label_2.append(row['Typical Appearance'])
            dfs_label_3.append(row['Indeterminate Appearance'])
            dfs_label_4.append(row['Atypical Appearance'])
            dfs_label_1_p.append(pcs[0])
            dfs_label_2_p.append(pcs[1])
            dfs_label_3_p.append(pcs[2])
            dfs_label_4_p.append(pcs[3])

            
            
        """
        for la in labels:
            #box = lsif.GetBoundingBox(la)
            #x = int(box[0])
            #y = int(box[1])
            #h = int(box[2])
            #w = int(box[3])          
            #sp_count = np.count_nonzero(spmap[y:y+h, x:x+w] == la)
            pcs=[0.0, 0.0, 0.0, 0.0]
            if c>1:            
                sp_count = len(spmap[spmap==la])
                c_count = float(np.count_nonzero(gt_arr[spmap==la] == c))

                pcs[c-1] = 0.0 if c_count==0.0 else c_count/sp_count
            dfs_image_id.append(row['id_image'])
            dfs_image_study.append(row['StudyInstanceUID'])
            dfs_label_sp.append(la)
            dfs_label_1.append(row['Negative for Pneumonia'])
            dfs_label_2.append(row['Typical Appearance'])
            dfs_label_3.append(row['Indeterminate Appearance'])
            dfs_label_4.append(row['Atypical Appearance'])
            dfs_label_1_p.append(pcs[0])
            dfs_label_2_p.append(pcs[1])
            dfs_label_3_p.append(pcs[2])
            dfs_label_4_p.append(pcs[3])
        """
        print(ii, row['id_image'], time.time()-tic)

        #if ii==1:
        #   break
        ii+=1
    
    #save superpixels
    data_sp =  {
                'image_id': dfs_image_id,
                'image_study': dfs_image_study,
                'label_sp': dfs_label_sp,
                'Negative_for_Pneumonia': dfs_label_1,
                'Typical_Appearance': dfs_label_2,
                'Indeterminate_Appearance': dfs_label_3,
                'Atypical_Appearance': dfs_label_4,
                'Negative_for_Pneumonia_percentage': dfs_label_1_p,
                'Typical_Appearance_percentage': dfs_label_2_p,
                'Indeterminate_Appearance_percentage': dfs_label_3_p,
                'Atypical_Appearance_percentage': dfs_label_4_p
            }

    df_superpixels = pd.DataFrame.from_dict(data_sp)
    df_superpixels.to_csv('train_superpixels.csv',index=False)

make_superpixels_train(side=100, l_inf=0, l_sup=10, save_gt=True)

# make_superpixels_train(side=50, l_inf=0, l_sup=1000, save_gt=True)
# make_superpixels_train(side=50, l_inf=1000, l_sup=2000, save_gt=True)
# make_superpixels_train(side=50, l_inf=2000, l_sup=3000, save_gt=True)
# make_superpixels_train(side=50, l_inf=3000, l_sup=4000, save_gt=True)
# make_superpixels_train(side=50, l_inf=5000, l_sup=6000, save_gt=True)
# make_superpixels_train(side=50, l_inf=6000, l_sup=7000, save_gt=True)
        

In [None]:
# imginputs = ["../input/imagesample/bear.jpg","../input/imagesample/bear.jpg"]
# i=0
# for imginput in imginputs:
#     imgoutput = "bear"+str(i)+".nrrd"
#     sp = makesp(imagefi=imginput, imagefo=imgoutput, side=30, comp=100, issave=True)
#     drawsp(imgoutput,"bearr"+str(i)+".png")
#     i+=1
#     #import urllib.request
#     # urllib.request.urlretrieve(
#     #     'https://criptoativo.com.br/wp-content/uploads/2021/01/Grayscale-1536x984.jpeg',
#     #     "gfg.jpeg")
#     # makesp("gfg.jpeg")

In [None]:
# from IPython.display import Image
# Image("bearr0.png")

In [None]:
# Image("bearr1.png")

In [None]:
#         argpass = []
#         for la in labels:
#             argpass.append({
#                 "c":c,
#                 "la":la,
#                 "sp":spmap,
#                 "gt":gt_arr
#             })
        
        
#         ncpus = 15
#         #ncpus = multiprocessing.cpu_count()-1
#         pool = Pool(processes=ncpus)
#         rr = pool.map(process_sp, argpass)
#         pool.close()

#         for rs in rr:
#             la = rs[0]
#             pcs = rs[1]
#             dfs_image_id.append(row['id_image'])
#             dfs_image_study.append(row['StudyInstanceUID'])
#             dfs_label_sp.append(la)
#             dfs_label_1.append(row['Negative for Pneumonia'])
#             dfs_label_2.append(row['Typical Appearance'])
#             dfs_label_3.append(row['Indeterminate Appearance'])
#             dfs_label_4.append(row['Atypical Appearance'])
#             dfs_label_1_p.append(pcs[0])
#             dfs_label_2_p.append(pcs[1])
#             dfs_label_3_p.append(pcs[2])
#             dfs_label_4_p.append(pcs[3])
