In [4]:
from __future__ import print_function, division

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Rectangle
import matplotlib.image as mpimg
from PIL import Image, ImageDraw
import PIL.ImageOps
import re
import os
from skimage import io, transform
import tensorflow as tf
import glob
import ast
import cv2
import json

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

plt.ion()   # interactive mode


In [84]:
# join all data into a dataframe 

cases = pd.read_csv("data/cases.csv")
images = pd.read_csv("data/images.csv")
coors = pd.read_csv("data/coors.csv")
case_df = cases[["id","num","tirads"]]

df = pd.merge(case_df, images, left_on='id', right_on = 'case_id', how='inner')
df1 = pd.merge(df, coors, left_on='id_y', right_on = 'image_id', how='inner')
df1= df1.drop(["id_x","id_y","id"],axis=1)

#create new columns

df1.loc[(df1.filename.str.contains("bening")) , "type"] = "test"
df1.loc[(df1.filename.str.contains("maling")) , "type"] = "val"
df1.loc[(df1.filename.str.contains("thyroid")) , "type"] = "train"
df1["cat"]="thyroid"

# benign & maling files are the same

df1.shape
segments= df1.copy()

In [85]:
segments = segments.drop(segments[segments.type=="val"].index)
segments.reset_index(drop=True, inplace=True)
segments.shape

(716, 12)

# Coordinates x,y extraction for segmentation

In [13]:

def flatten_json(y):
    ''' coordinate x,y extraction for segmentation

    x,y coordinates convert to 2d np array.

    '''

    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(y)
    return out

In [6]:
# Break up Marks

pattern = r'"points"(.*?)"annotation"'
segments["mark_1"] = segments["mark"].apply(lambda x: re.findall(pattern, x)[0])
segments["mark_2"] = segments["mark"].apply(lambda x: re.findall(pattern, x)[1] if len(re.findall(pattern, x)) > 1 else "")

# remove : 

segments["mark_1"]=segments["mark_1"].apply(lambda x: x[2:])
segments["mark_2"]=segments["mark_2"].apply(lambda x: x[2:])
segments["mark_1"]=segments["mark_1"].apply(lambda x: x[:-2])
segments["mark_2"]=segments["mark_2"].apply(lambda x: x[:-2])

# Remove empty rows

segments = segments[segments['mark']!=""]

In [20]:
segments.head()

Unnamed: 0,num,tirads,case_id,filename,mark,image_id,x,y,w,h,...,path,path_label,centre_x,centre_y,p_w,p_h,mark_1,mark_2,name,path_gt
0,545,5,1,cimalab/bening/545/1.jpg,"[{""points"": [{""x"": 413, ""y"": 91}, {""x"": 406, ""...",1,385,91,68,98,...,data/image/bening_545_1_image.jpg,data/label/bening_545_1_label.jpg,1.745833,0.583333,0.283333,0.408333,"[{""x"": 413, ""y"": 91}, {""x"": 406, ""y"": 93}, {""x...",,bening_545_1,data/gt/bening_545_1_gt.jpg
1,545,5,1,cimalab/bening/545/2.jpg,"[{""points"": [{""x"": 291, ""y"": 76}, {""x"": 281, ""...",2,271,72,100,143,...,data/image/bening_545_2_image.jpg,data/label/bening_545_2_label.jpg,1.3375,0.597917,0.416667,0.595833,"[{""x"": 291, ""y"": 76}, {""x"": 281, ""y"": 84}, {""x...",,bening_545_2,data/gt/bening_545_2_gt.jpg
2,549,4c,5,cimalab/bening/549/1.jpg,"[{""points"": [{""x"": 292, ""y"": 123}, {""x"": 293, ...",3,292,101,79,56,...,data/image/bening_549_1_image.jpg,data/label/bening_549_1_label.jpg,1.38125,0.5375,0.329167,0.233333,"[{""x"": 292, ""y"": 123}, {""x"": 293, ""y"": 129}, {...",,bening_549_1,data/gt/bening_549_1_gt.jpg
3,549,4c,5,cimalab/bening/549/2.jpg,"[{""points"": [{""x"": 259, ""y"": 142}, {""x"": 258, ...",4,258,95,69,53,...,data/image/bening_549_2_image.jpg,data/label/bening_549_2_label.jpg,1.21875,0.50625,0.2875,0.220833,"[{""x"": 259, ""y"": 142}, {""x"": 258, ""y"": 138}, {...",,bening_549_2,data/gt/bening_549_2_gt.jpg
4,550,5,6,cimalab/bening/550/1.jpg,"[{""points"": [{""x"": 236, ""y"": 110}, {""x"": 236, ...",5,235,71,104,138,...,data/image/bening_550_1_image.jpg,data/label/bening_550_1_label.jpg,1.195833,0.583333,0.433333,0.575,"[{""x"": 236, ""y"": 110}, {""x"": 236, ""y"": 114}, {...",,bening_550_1,data/gt/bening_550_1_gt.jpg


In [18]:
segments = pd.read_csv("data/segments2.csv")
segments.shape

(716, 22)

In [19]:
# benign & maling files are the same
# segments = segments.drop(segments[segments.type=="val"].index)
segments = segments[~segments["filename"].isin(["cimalab/bening/550/3.jpg","cimalab/bening/571/2.jpg","cimalab/thyroid/338_2.jpg","cimalab/thyroid/337_3.jpg"])]
segments.reset_index(drop=True, inplace=True)
segments.shape

(710, 22)

In [21]:
segments.to_csv('data/segments2.csv', index=False)

In [10]:
def img_rescale(data):   

    ''' raw image size = 560 * 315
        resize to 256 * 256
    arg: 
    img_id = filenmae
    name = new name
    path = img path
    path_label =label path
    resize to 240 * 240

    '''

    i=0
    for i in range(len(data)):
        img_id = data.filename[i]
        im = np.array(Image.open('data/{}'.format(img_id)), dtype=np.uint8)
        img = cv2.copyMakeBorder(im,0,245,0,0,cv2.BORDER_CONSTANT)
        resize_im = cv2.resize(img, (256, 256))
        #create rescaled images

        #new path 
#         for r in (("cimalab/",""),(".jpg","_"),("/","_")):
#             img_id = img_id.replace(*r)

            #new path 
#             data["name"]=data["filename"]
#             data["name"][i]=img_id
#             data["path"] = "data/image/{}image.jpg".format(img_id)
#             data["path_label"] = "data/label/{}label.jpg".format(img_id)

        #create rescaled images
#         cv2.imwrite("data/image/{}image.jpg".format(img_id), resize_im)
        cv2.imwrite(data.path[i], resize_im)
            
#         data["path"] = "data/image/{}.png".format(data.name[i])
#         data["path_label"] = "data/label/{}.png".format(data.name[i])
#         data["path_gt"] = "data/gt/{}.png".format(data.name[i])
#         data["path"][i] = "data/image/{}.png".format(data.name[i])
#         data["path_label"][i] = "data/label/{}.png".format(data.name[i])
#         data["path_gt"][i] = "data/gt/{}.png".format(data.name[i])

img_rescale(segments)

In [275]:
#segments['name'] = segments["name"].str.replace("/","_")

In [14]:
def label(data):
    ''' black & White label based on the mark annotations

    arg: 
    marks =flatten json coordinates
    two segmentations 
    size =560 * 560
   
    '''

    i=0
    for i in range(len(data)):
        seg_file = data.path_label[i]
        
        """create image with marks"""
        marks =  np.array([x for x in list(flatten_json(json.loads(data.mark_1[i])).values()) if type(x)==int]).reshape(-1,2)
        lis = []
        for coord in marks:
            lis.append(coord[0])
            lis.append(coord[1])
        polygon = lis
        img = Image.new('L', (560, 560), 255)
        ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
        mask = np.array(img)
        img.save(data["path_label"][i])
 
        
        if data.mark_2[i]!="":
            marks2 =  np.array([x for x in list(flatten_json(json.loads(data.mark_2[i])).values()) if type(x)==int]).reshape(-1,2)
            lis2 = []
            for coord in marks2:
                lis2.append(coord[0])
                lis2.append(coord[1])
            polygon2 = lis2
            img2 = Image.open(data.path_label[i]) 
            ImageDraw.Draw(img2).polygon(polygon2, outline=1, fill=1)
            mask = np.array(img2)
#             basepath = "./data/single_masks/"
#             img2.save("data/single_masks/{}.png".format(data.name[i]))
            img2.save(data.path_label[i])

label(segments)   


In [15]:
def label_rescale(data,sizeby):

    ''' label resize to 240 * 240

    arg: 
    invert image :white inside, black outside
    '''
    
    for i in range(len(data)):
        
        #resize 
        size=sizeby,sizeby
        img = (Image.open(data["path_label"][i])) 
#         resize_im = cv2.resize(img, (size))
#         cv2.imwrite("data/single_masks/{}_label.jpg".format(data.name[i]), resize_im)
       
        #inversing label
        inverted_image = PIL.ImageOps.invert(img)
        inverted_image.thumbnail(size, Image.ANTIALIAS)
        inverted_image.save("data/label_/{}_label.jpg".format(data.name[i]))
        
label_rescale(segments,256)

In [16]:
def gt(data):
    
    ''' ground truth : combine image and label 
    
    '''
    
    data["path_gt"]=0
    i=0
    for i in range(len(data)):
        img = Image.open(data["path_label"][i]) 
        bg = Image.open(data["path"][i])
        gt_img = Image.new('RGB', (256, 256), (0, 0, 0, 0))
        gt_img.paste(bg, (0,0))
        gt_img.paste(img, (0,0), mask=img)
        gt_img.save("data/gt/{}_gt.jpg".format(data.name[i]))
        data["path_gt"][i] = "data/gt/{}_gt.jpg".format(data.name[i])
        
gt(segments)

In [124]:
segments.head()

Unnamed: 0,num,tirads,case_id,filename,mark,image_id,x,y,w,h,...,path,path_label,centre_x,centre_y,p_w,p_h,mark_1,mark_2,name,path_gt
0,545,5,1,cimalab/bening/545/1.jpg,"[{""points"": [{""x"": 413, ""y"": 91}, {""x"": 406, ""...",1,385,91,68,98,...,data/image/bening_545_1_image.jpg,data/label/bening_545_1_label.jpg,1.745833,0.583333,0.283333,0.408333,"[{""x"": 413, ""y"": 91}, {""x"": 406, ""y"": 93}, {""x...",,bening_545_1,data/gt/bening_545_1_gt.jpg
1,545,5,1,cimalab/bening/545/2.jpg,"[{""points"": [{""x"": 291, ""y"": 76}, {""x"": 281, ""...",2,271,72,100,143,...,data/image/bening_545_2_image.jpg,data/label/bening_545_2_label.jpg,1.3375,0.597917,0.416667,0.595833,"[{""x"": 291, ""y"": 76}, {""x"": 281, ""y"": 84}, {""x...",,bening_545_2,data/gt/bening_545_2_gt.jpg
2,549,4c,5,cimalab/bening/549/1.jpg,"[{""points"": [{""x"": 292, ""y"": 123}, {""x"": 293, ...",3,292,101,79,56,...,data/image/bening_549_1_image.jpg,data/label/bening_549_1_label.jpg,1.38125,0.5375,0.329167,0.233333,"[{""x"": 292, ""y"": 123}, {""x"": 293, ""y"": 129}, {...",,bening_549_1,data/gt/bening_549_1_gt.jpg
3,549,4c,5,cimalab/bening/549/2.jpg,"[{""points"": [{""x"": 259, ""y"": 142}, {""x"": 258, ...",4,258,95,69,53,...,data/image/bening_549_2_image.jpg,data/label/bening_549_2_label.jpg,1.21875,0.50625,0.2875,0.220833,"[{""x"": 259, ""y"": 142}, {""x"": 258, ""y"": 138}, {...",,bening_549_2,data/gt/bening_549_2_gt.jpg
4,550,5,6,cimalab/bening/550/1.jpg,"[{""points"": [{""x"": 236, ""y"": 110}, {""x"": 236, ...",5,235,71,104,138,...,data/image/bening_550_1_image.jpg,data/label/bening_550_1_label.jpg,1.195833,0.583333,0.433333,0.575,"[{""x"": 236, ""y"": 110}, {""x"": 236, ""y"": 114}, {...",,bening_550_1,data/gt/bening_550_1_gt.jpg


In [307]:
segments.to_csv('data/segments2.csv', index=False)


Unnamed: 0,num,tirads,case_id,filename,mark,image_id,x,y,w,h,...,path_label,centre_x,centre_y,p_w,p_h,mark_1,mark_2,name,single_mask,path_gt
0,545,5,1,cimalab/bening/545/1.jpg,"[{""points"": [{""x"": 413, ""y"": 91}, {""x"": 406, ""...",1,385,91,68,98,...,data/label/bening_545_1_label.jpg,1.745833,0.583333,0.283333,0.408333,"[{""x"": 413, ""y"": 91}, {""x"": 406, ""y"": 93}, {""x...",,bening_545_1,0,data/gt/bening_545_1_gt.jpg
1,545,5,1,cimalab/bening/545/2.jpg,"[{""points"": [{""x"": 291, ""y"": 76}, {""x"": 281, ""...",2,271,72,100,143,...,data/label/bening_545_2_label.jpg,1.3375,0.597917,0.416667,0.595833,"[{""x"": 291, ""y"": 76}, {""x"": 281, ""y"": 84}, {""x...",,bening_545_2,0,data/gt/bening_545_2_gt.jpg
2,549,4c,5,cimalab/bening/549/1.jpg,"[{""points"": [{""x"": 292, ""y"": 123}, {""x"": 293, ...",3,292,101,79,56,...,data/label/bening_549_1_label.jpg,1.38125,0.5375,0.329167,0.233333,"[{""x"": 292, ""y"": 123}, {""x"": 293, ""y"": 129}, {...",,bening_549_1,0,data/gt/bening_549_1_gt.jpg
3,549,4c,5,cimalab/bening/549/2.jpg,"[{""points"": [{""x"": 259, ""y"": 142}, {""x"": 258, ...",4,258,95,69,53,...,data/label/bening_549_2_label.jpg,1.21875,0.50625,0.2875,0.220833,"[{""x"": 259, ""y"": 142}, {""x"": 258, ""y"": 138}, {...",,bening_549_2,0,data/gt/bening_549_2_gt.jpg
4,550,5,6,cimalab/bening/550/1.jpg,"[{""points"": [{""x"": 236, ""y"": 110}, {""x"": 236, ...",5,235,71,104,138,...,data/label/bening_550_1_label.jpg,1.195833,0.583333,0.433333,0.575,"[{""x"": 236, ""y"": 110}, {""x"": 236, ""y"": 114}, {...",,bening_550_1,0,data/gt/bening_550_1_gt.jpg


In [17]:
def enumerated_img(outroot, data):
    for i in range(len(data)):
        if len(str(i)) == 1:
            name = '00'+str(i)
        elif len(str(i)) == 2:
            name = '0'+str(i)
        else:
            name = str(i)
        save_path =os.path.join(outroot, name+'.jpg')
        img = cv2.imread(data[i])   
        cv2.imwrite(save_path, img)
enumerated_img("data/enumerate/label/", segments.path_label)   
enumerated_img("data/enumerate/image/", segments.path)
enumerated_img("data/enumerate/gt/", segments.path_gt)

In [15]:
# data_root="data/inverted/"
# def enumerated_img(outroot):
#     file_path = []           
#     file_path += glob.glob(os.path.join("data/inverted/", '*.jpg'))
#     for i in range(len(file_path)):
#         if len(str(i)) == 1:
#             name = '00'+str(i)
#         elif len(str(i)) == 2:
#             name = '0'+str(i)
#         else:
#             name = str(i)
#         save_path =os.path.join(outroot, name+'.jpg')
#         img = cv2.imread(file_path[i]) 
#         cv2.imwrite(save_path, img)
                           
# enumerated_img("data/inverted_240/")