In [38]:
import cv2
import numpy as np
import json
import csv
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import os
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPooling2D, UpSampling2D, Dropout, Conv2DTranspose, concatenate
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf



In [46]:
#Data augmentation adaptado e modificado de https://github.com/FengYen-Chang/Data-Augmentation
import random


# avg blur minimum filter size is 3
def avg_blur(img, max_filiter_size = 3) :
	img = img.astype(np.uint8)
	if max_filiter_size >= 3 :
		filter_size = random.randint(3, max_filiter_size)
		if filter_size % 2 == 0 :
			filter_size += 1
		out = cv2.blur(img, (filter_size, filter_size))
	return out

# gaussain blur minimum filter size is 3
# when sigma = 0 gaussain blur weight will compute by program
# when the sigma is more large the blur effect more obvious

def gaussain_blur(img, max_filiter_size = 3, sigma = 0) :
	img = img.astype(np.uint8)
	if max_filiter_size >= 3 :
		filter_size = random.randint(3, max_filiter_size)
		if filter_size % 2 == 0 :
			filter_size += 1
		#print ('size = %d'% filter_size)
		out = cv2.GaussianBlur(img, (filter_size, filter_size), sigma)
	return out

def gaussain_noise(img, mean = 0, var = 0.1) :
	img = img.astype(np.uint8)
	h, w, c = img.shape
	sigma = var ** 0.5
	gauss = np.random.normal(mean, sigma, (h, w, c))
	gauss = gauss.reshape(h, w, c).astype(np.uint8)
	noisy = img + gauss
	return noisy

# fill_pixel is 0(black) or 255(white)
def img_shift(img,mask, x_min_shift_piexl = -1, x_max_shift_piexl = 1, y_min_shift_piexl = -1, y_max_shift_piexl = 1, fill_pixel = 0):
  img = img.astype(np.uint8)
  h, w, c = img.shape
  out = np.zeros(img.shape)
  maskout = np.zeros(mask.shape)
	
  if fill_pixel == 255:
    out[:, :] = 255
  out = out.astype(np.uint8)
  maskout = maskout.astype(np.uint8)
  
  move_x = random.randint(x_min_shift_piexl, x_max_shift_piexl)
  move_y = random.randint(y_min_shift_piexl, y_max_shift_piexl)
 
  if move_x >= 0 and move_y >= 0 :
    out[move_y:, move_x: ] = img[0: (h - move_y), 0: (w - move_x)]
    maskout[move_y:, move_x: ] = mask[0: (h - move_y), 0: (w - move_x)]
  elif move_x < 0 and move_y < 0 :
    out[0: (h + move_y), 0: (w + move_x)] = img[ - move_y:, - move_x:]
    maskout[0: (h + move_y), 0: (w + move_x)] = mask[ - move_y:, - move_x:]
  elif move_x >= 0 and move_y < 0 :
    out[0: (h + move_y), move_x:] = img[ - move_y:, 0: (w - move_x)]
    maskout[0: (h + move_y), move_x:] = mask[ - move_y:, 0: (w - move_x)]
  elif move_x < 0 and move_y >= 0 :
    out[move_y:, 0: (w + move_x)] = img[0 : (h - move_y), - move_x:]
    maskout[move_y:, 0: (w + move_x)] = mask[0 : (h - move_y), - move_x:]
    
  return out,maskout


# In img_flip func. it will random filp image
# when flip factor is 1 it will do hor. flip (Horizontal)
#					  0            ver. flip (Vertical)
#					 -1			   hor. + ver flip
def img_flip(img,mask):
  img = img.astype(np.uint8)
  flip_factor = random.randint(-1, 1)
  out = cv2.flip(img, flip_factor)
  maskout = cv2.flip(mask, flip_factor)
  return out,maskout


# change image contrast by hsv
def img_contrast(img, min_s, max_s, min_v, max_v) :
	img = img.astype(np.uint8)
	hsv_img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
	_s = random.randint(min_s, max_s)
	_v = random.randint(min_v, max_v)
	if _s >= 0 :
		hsv_img[:, :, 1] += _s
	else :
		_s = - _s
		hsv_img[:, :, 1] -= _s
	if _v >= 0 :
		hsv_img[:, :, 2] += _v
	else :
		_v = - _v
		hsv_img[:, :, 2] += _v
	out = cv2.cvtColor(hsv_img, cv2.COLOR_HSV2BGR)
	return out

#Edge enhance
def sharpen_img(img):
    kernel = np.array([[-1,-1,-1,-1,-1],
                    [-1,2,2,2,-1],
                    [-1,2,8,2,-1],
                    [-2,2,2,2,-1],
                    [-1,-1,-1,-1,-1]])/8.0
    result=cv2.filter2D(img,-1,kernel)
    return result


def create_augment_data(arrayimgs,arraymasks):

  auxarrayimgs = arrayimgs.copy()
  auxarraymasks = arraymasks.copy()

  # parameter for data augment functions
  _max_filiter_size = 5 		#for avg_blur and gaussain_blur
  _sigma = 0 					# for gaussain_blur

  _mean = 0 					# for gaussain_noise
  _var = 0.1					# for gaussain_noise

  _x_min_shift_piexl = -20 	# for img_shift
  _x_max_shift_piexl = 20 	# for img_shift
  _y_min_shift_piexl = -20 	# for img_shift
  _y_max_shift_piexl = 20		# for img_shift
  _fill_pixel = 255			# for img_shift


  _min_s = -10				# for img_contrast
  _max_s = 10					# for img_contrast
  _min_v = -10				# for img_contrast
  _max_v = 10					# for img_contrast



  for index in range(len(arrayimgs)):
    generate_quantity = 50
    while generate_quantity > 0:
      
      img = arrayimgs[index]
      mask = arraymasks[index]
			
      if random.randint(0, 1) == 1:
        img = avg_blur(img, _max_filiter_size)

      if random.randint(0, 1) == 1:
        img = gaussain_blur(img, _max_filiter_size, _sigma)

      if random.randint(0, 1) == 1:
        img = gaussain_noise(img, _mean, _var)

      if random.randint(0, 1) == 1:
        img,mask = img_shift(img, mask, _x_min_shift_piexl, _x_max_shift_piexl, _y_min_shift_piexl, _y_max_shift_piexl, _fill_pixel)
        
      if random.randint(0, 1) == 1:
        img,mask = img_flip(img, mask)

      if random.randint(0, 1) == 1:
        img = img_contrast(img, _min_s, _max_s, _min_v, _max_v)

      if random.randint(0, 1) == 1:
        img = sharpen_img(img)
      
      auxarrayimgs.append(img)
      auxarraymasks.append(mask)

      generate_quantity -=1

  return auxarrayimgs, auxarraymasks



In [48]:
#  Carregando arquivos de anotação json e capturando as coordenadas do objeto interesse.
#  Com tais pontos torna-se possivel criar mascaras binárias para um posterior treino de um modelo de IA.
#  Como melhor maneira de redimensionar a imagem é mais eficaz realizar corte na imagem, afinal como os arquivos de anotações
#em forma de cordenadas o processo de redimensionar teria maior custo, levando em conta que as anotações são importantes 
#para a geração das mascaras.
#  Tendo em vista que de inicio ja temos as cordenadas da área de interesse ja é possivel calcular as centroides 
#calculando moments: https://en.wikipedia.org/wiki/Image_moment.
#  A centroide do objeto alvo é então usada como ponto de inicio do raio de corte sob as imagens este definido manualmente.
#  Sabendo agora que o ponto de inicio é em uma centróide, para chegar ao tamanho desejado de redimensionamento X 
#é utilizado o valor escolhido dividido por 2, afinal já é conhecido que um diametro é igual a duas vezes o raio, o valor X foi setado para 700
#por ser o menor lado de imagem dentre as que vi.
#  Agora veja bem, ao realinhar tal tipo de recorte tambem nos dá um outro tipo de desafio, o simples fato de que os objetos de interesse
#estarem proximos as bordas da imagem. Solução? Simples. Recorta apenas o possivel do lado que está na borda e compensa do outro lado.
# O modelo utilizado foi baseada em uma simples arquitetura unet exemplificado em https://www.kaggle.com/keegil/keras-u-net-starter-lb-0-277
#e como é de se esperar, a qualidade é baixa.
# O data augmentation ramdomiza as imagens de entrada entre varios algoritmos e filtros e estes por fim também randomizam as valores dos parametros.


def calculate_centroid(pts):
  M = cv2.moments(pts)
  cX = int(M["m10"] / M["m00"])
  cY = int(M["m01"] / M["m00"])
  return (cX, cY)
  

def crop_ROI_img(img,crop_radius,centroidX,centroidY):
  crop = crop_radius/2
  
  if len(img.shape) == 2:
    height,width= img.shape
  else:
    height,width,_= img.shape
  
  left=0
  right=0
  top=0
  bottom=0


  growtoRight = centroidX + crop
  growtoLeft = centroidX - crop
  growtoTop = centroidY + crop
  growtoBottom = centroidY - crop
  

  if (growtoRight > width): 
    right = width
    left = int(growtoLeft - (growtoRight - right))
  elif (growtoLeft < 0): 
    left = 0
    right = int(growtoRight + abs(growtoLeft))
  else: 
    right = int(growtoRight)
    left = int(growtoLeft)

  if (growtoTop > height):
    top = height
    bottom = int(growtoBottom - (crop - (top - centroidY)))

  elif (growtoBottom < 0):
    top = int(growtoTop + abs(growtoBottom))
    bottom = 0

  else: 
    top = int(growtoTop)
    bottom = int(growtoBottom)
    
  newimage = img[bottom:top,left:right]  
  return newimage


def create_mask(img,pts):
  height,width,_= img.shape
  mask_img = np.zeros((height,width), np.uint8)
  mask_img = cv2.drawContours(mask_img ,[pts], -1, 255, -1)
  return mask_img

def resizeimgsforModel(img,size):
  auximglist = []
  dim = (size, size)
  for item in img:
    resized = cv2.resize(item, dim, interpolation = cv2.INTER_AREA)
    auximglist.append(resized)
  return auximglist

def load_dataset(path):
  imgfile = []
  maskfile = []

  imgtrain = []
  masktrain = []

  imgtest = []
  masktest = []
  
  pathImages = os.listdir(path)
  pathImages.sort()

  crop_radius = 700 

  for index in range(0,len(pathImages)-1,2):
    imgfile.append(path+"/"+pathImages[index])
    maskfile.append(path+"/"+pathImages[index+1])

  X_train, X_test, y_train, y_test = train_test_split(imgfile, maskfile, test_size=0.2, random_state=42)
  
  #Loading Train
  for imgtr in range(len(X_train)):
    img = cv2.imread(X_train[imgtr])
    
    with open(y_train[imgtr]) as f:
      data = json.load(f)
    
    points = data['shapes'][0]

    
    if len(points.get('points')) == 4:
      [x1,y1],[x2,y2],[x3,y3],[x4,y4] = points.get('points')
      pts = np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]], np.int32)
      pts = pts.reshape((-1,1,2))
      (centroidX,centroidY) = calculate_centroid(pts)

      resized_img = crop_ROI_img(img,crop_radius,centroidX,centroidY)
      imgtrain.append(resized_img)

      maskimg = create_mask(img,pts)
      resized_mask = crop_ROI_img(maskimg,crop_radius,centroidX,centroidY)
      masktrain.append(resized_mask)

    elif len(points.get('points')) == 3:
      [x1,y1],[x2,y2],[x3,y3]= points.get('points')
      pts = np.array([[x1,y1],[x2,y2],[x3,y3]], np.int32)
      pts = pts.reshape((-1,1,2))
      (centroidX,centroidY) = calculate_centroid(pts) 

      resized_img = crop_ROI_img(img,crop_radius,centroidX,centroidY)
      imgtrain.append(resized_img)

      maskimg = create_mask(img,pts)
      resized_mask = crop_ROI_img(maskimg,crop_radius,centroidX,centroidY)
      masktrain.append(resized_mask)
  
  #Loading Test
  for imgte in range(len(X_test)):
    img = cv2.imread(X_test[imgte])

    with open(y_test[imgte]) as f:
      data = json.load(f)

    points = data['shapes'][0]

    if len(points.get('points')) == 4:
      [x1,y1],[x2,y2],[x3,y3],[x4,y4] = points.get('points')
      pts = np.array([[x1,y1],[x2,y2],[x3,y3],[x4,y4]], np.int32)
      pts = pts.reshape((-1,1,2))
      (centroidX, centroidY) = calculate_centroid(pts)

      resized_img = crop_ROI_img(img,crop_radius,centroidX,centroidY)
      imgtest.append(resized_img)

      maskimg = create_mask(img,pts)
      resized_mask = crop_ROI_img(maskimg,crop_radius,centroidX,centroidY)
      masktest.append(resized_mask)
    
    elif len(points.get('points')) == 3:
      [x1,y1],[x2,y2],[x3,y3] = points.get('points')
      pts = np.array([[x1,y1],[x2,y2],[x3,y3]], np.int32)
      pts = pts.reshape((-1,1,2))
      (centroidX, centroidY) = calculate_centroid(pts)

      resized_img = crop_ROI_img(img,crop_radius,centroidX,centroidY)
      imgtest.append(resized_img)

      maskimg = create_mask(img,pts)
      resized_mask = crop_ROI_img(maskimg,crop_radius,centroidX,centroidY)
      masktest.append(resized_mask)
      

  return (imgtrain,masktrain,imgtest,masktest)



def create_model(image_size):
  inputs = Input(image_size)
  conv1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(inputs)
  conv1 = BatchNormalization()(conv1)
  conv1 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(conv1)
  conv1 = BatchNormalization()(conv1)
  pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) 

  conv2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(pool1)
  conv2 = BatchNormalization()(conv2)
  conv2 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(conv2)
  conv2 = BatchNormalization()(conv2)
  pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) 

  conv3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(pool2)
  conv3 = BatchNormalization()(conv3)
  conv3 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(conv3)
  conv3 = BatchNormalization()(conv3)
  pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) 

  conv4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(pool3)
  conv4 = Conv2D(128, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(conv4)

  up6 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv4), conv3], axis=3) 
  conv6 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(up6)
  conv6 = BatchNormalization()(conv6)
  conv6 = Conv2D(64, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(conv6)
  conv6 = BatchNormalization()(conv6)

  up7 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv6), conv2], axis=3) 
  conv7 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(up7)
  conv7 = BatchNormalization()(conv7)
  conv7 = Conv2D(32, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(conv7)
  conv7 = BatchNormalization()(conv7)

  up8 = concatenate([Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same')(conv7), conv1], axis=3) 
  conv8 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(up8)
  conv8 = BatchNormalization()(conv8)
  conv8 = Conv2D(16, (3, 3), activation='elu', kernel_initializer='he_normal', padding='same')(conv8)
  conv8 = BatchNormalization()(conv8)

  conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv8)

  model = Model(inputs=[inputs], outputs=[conv10])

  return model

In [49]:
def load_data(image_size):
    imgtrain,masktrain,imgtest,masktest = load_dataset("TrainingSet")
    
    #Resizing images and masks for train models
    imgtrain = resizeimgsforModel(imgtrain,image_size)
    masktrain = resizeimgsforModel(masktrain,image_size)

    #Data augmentation
    imgtrain,masktrain = create_augment_data(imgtrain,masktrain)
    imgtrain = np.array(imgtrain)
    masktrain = np.array(masktrain)

    imgtest = resizeimgsforModel(imgtest,image_size)
    masktest = resizeimgsforModel(masktest,image_size)
    imgtest = np.array(imgtest)
    masktest = np.array(masktest)


    imgtrain = imgtrain.astype(np.float32)
    imgtest = imgtest.astype(np.float32)

    imgtrain = imgtrain / 255.0
    imgtest = imgtest / 255.0

    return (imgtrain, masktrain, imgtest, masktest)



In [50]:
def train_model(model,batch_size,train_steps,validation_steps,epochs,imgtrain,masktrain,imgtest,masktest):
    
    callbacks = [
                ModelCheckpoint("pretreinedmodels/model.val_accuracy={val_accuracy:.5f}.h5", monitor='val_accuracy', verbose=1, save_best_model=True),
                EarlyStopping(monitor="val_accuracy", patience=5, verbose=1)
            ]

    model.fit(imgtrain,
            masktrain,
                steps_per_epoch=train_steps,
                validation_data=(imgtest,masktest),
                validation_steps=validation_steps,
                epochs=epochs,
                verbose=1,
                callbacks=callbacks
            )
            


In [51]:
def Build_TrainModel():
  image_size = 256
  modelinputsize = (image_size, image_size, 3)

  imgtrain,masktrain,imgtest,masktest = load_data(image_size)

  batch_size = 100
  train_steps=len(imgtrain) // batch_size
  validation_steps=len(imgtest)
  epochs = 10

  model = create_model(modelinputsize)
  model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
  model.summary()
  train_model(model,batch_size,train_steps,validation_steps,epochs,imgtrain,masktrain,imgtest,masktest)


In [None]:
# Caso preferir treinar o modelo, abaixo está a chamada da função. Este processo tem bastante custo computacional nesse sentido não oriento utiliza-lo a não ser que seja em uma máquina com bastante recursos.

#Build_TrainModel()

In [2]:
# Nesta seção você pode testar o método utilizando o arquivo salvo do modelo treinado anteriormente.

import csv
import cv2
import numpy as np
import os
from tensorflow.keras.models import load_model

import matplotlib.pyplot as plt

def calculate_centroid(pts):
  M = cv2.moments(pts)
  if M["m10"] == 0 or M["m00"] == 0 or M["m01"] ==0:
      return (0,0)
  else:
    cX = int(M["m10"] / M["m00"])
    cY = int(M["m01"] / M["m00"])
    return (cX, cY)



def Segment_Find_On_AllImageDirectory(image_directory):
    modelpredict = load_model('pretreinedmodels/model.val_accuracy=0.80651.h5')
    pathImages = os.listdir(image_directory)
    pathImages.sort()
    result = []

    if os.path.exists("results/wheres_wally_test.csv"):
        os.remove("results/wheres_wally_test.csv")

    csvfile = open("results/wheres_wally_test.csv", mode='w', encoding='utf-8')


    

    for index in range(len(pathImages)):
        img = cv2.imread(image_directory+"/"+pathImages[index])
        img = img.astype(np.float32)

        img = img / 255.0
        h, w, c = img.shape

        h_aux = 256
        w_aux = 256

        
        dim = (w_aux,h_aux)
        resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)

        result_mask = modelpredict.predict(resized.reshape(1,h_aux,w_aux, c))
        result_mask = (result_mask > 0.9).astype(np.uint8)

        dim = (w,h)
        result_mask = cv2.resize(result_mask.reshape(h_aux,w_aux), dim, interpolation = cv2.INTER_AREA)
        
        H2,W2 = result_mask.shape
        auximg = np.zeros((H2,W2), np.uint8)
        
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
        close = cv2.erode(result_mask.reshape(H2,W2),kernel,iterations = 5)

        cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnts = cnts[0] if len(cnts) == 2 else cnts[1]
        cx = 0 
        cy = 0 

        
        for cnt in cnts:
            cx,cy = calculate_centroid(cnt)
            auximg = cv2.drawContours(auximg ,[cnt], -1, 255, -1)
        
        name, ext = os.path.splitext(pathImages[index])
        
        csvfile.write(pathImages[index]+","+str(cx)+","+str(cy))
        csvfile.write("\n")

    

        cv2.imwrite("results/segmentedmasks/"+name+"_mask_"+ext, auximg)
    csvfile.close()
    




Segment_Find_On_AllImageDirectory("TestSet")

#O  Jupyter notebook costuma manter espaço usado em memória da GPU mesmo após uso, por esse motivo uma solução bem simples é encerrar o processo manualmente.
pid = os.getpid()
!kill -9 $pid
