# Using Image Masks

*https://docs.opendronemap.org/masks/*

Starting from ODM 2.0 people can supply image masks to inform the software to skip reconstruction over certain areas. This is useful for cases where the sky was accidentally included in the input photos from oblique shots, or simply to limit the reconstruction of a single subject.

To add a mask, simply create a new black and white image of the same dimension as the target image you want to mask (you can use a program such as GIMP to do this). Color in black the areas to exclude from the reconstruction.

Name your file:

\<filename\>_mask.JPG

For example, DJI_0018.JPG can have a mask by creating a DJI_0018_mask.JPG file and include that in the list of images.

You can use .JPG, .PNG, .BMP and .TIF formats for image masks.

In [2]:
import os
from glob import glob
from datetime import datetime
import numpy as np
import pickle
from tqdm import tqdm
import matplotlib.pyplot as plt 
%matplotlib inline 

from PIL import Image

import tensorflow as tf
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dropout 
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import concatenate

# from tensorflow.keras.preprocessing.image import array_to_img


In [3]:
images_path = r"fotos"
mask_inference_path = r"masked_inference"
mask_inference_resized_path = r"masked_inference_resized"
mask_suffix = "_mask"


In [4]:
def create_dir(dir_path):
    if not os.path.isdir(dir_path):
        os.makedirs(dir_path)
        # print(f"create_dir: {dir_path}")
       

def get_name_mask_inferenced(image_name):
    base_dir = os.path.dirname(image_name)
    base_name = os.path.basename(image_name)
    nomes = base_name.split(os.extsep)
    base_nameflie = nomes[0]
    extension = nomes[1]
    retorno = os.path.join(base_dir, base_nameflie + mask_suffix + os.path.extsep + extension)
    # print(f"get_name_mask_inferenced: {retorno}")
    return retorno


def get_fmtedDateTime():
    fmtedDateTime = datetime.now().strftime('%Y%m%d%H%M%S')
    retorno = fmtedDateTime
    # print(f"get_fmtedDateTime: {retorno}")
    return retorno


def process_path(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_png(img,channels=3)
    img = tf.image.convert_image_dtype(img,tf.float32) #this do the same as dividing by 255 to set the values between 0 and 1 (normalization)
    # print(f"process_path: {True}")
    return img


def preprocess_image(image):
    input_image = tf.image.resize(image,(96,128),method='nearest')
    preprocessed_image = input_image
    # print(f"preprocess_image: {True}")
    return preprocessed_image


def inference(model, image):
    predictions = model.predict(np.expand_dims(image, axis=0))
    # print(f"inference: {True}" )
    return predictions


def create_mask(pred_mask):
    pred_mask = tf.argmax(pred_mask, axis=-1)
    pred_mask = pred_mask[..., tf.newaxis]
    # print(f"create_mask: {True}")
    return pred_mask[0]


def reprocess_image(mask_inferenced):
    # image_reduced = tf.math.reduce_max(mask_inferenced, axis=-1, keepdims=True)
    output_image = tf.image.resize(mask_inferenced,(3000,4000),method='nearest')
    reprocessed_mask = create_mask(output_image)
    # print(f"reprocess_image: {True}")
    return reprocessed_mask

def image_resize(image_tensor):
    # print(f"image_tensor.shape: {image_tensor.shape}" )
    # Assuming you have an image tensor with shape (1, 96, 128, 23)
    # Reshape the image tensor to (96, 128, 23)
    image_tensor_reshaped = tf.reshape(image_tensor, (96, 128, 23))
    # print(f"image_tensor_reshaped.shape: {image_tensor_reshaped.shape}" )

    # Expand the dimensions to (1, 96, 128, 23, 1)
    expanded_image_tensor = tf.expand_dims(image_tensor_reshaped, axis=-1)
    # print(f"expanded_image_tensor.shape: {expanded_image_tensor.shape}" )

    # Resize the image tensor to the desired shape (3000, 4000, 1)
    resized_image = tf.image.resize(expanded_image_tensor, (3000, 4000), method=tf.image.ResizeMethod.BILINEAR)
    # print(f"resized_image.shape: {resized_image.shape}" )

    # Remove the extra dimension to obtain (3000, 4000, 1)
    resized_image = tf.squeeze(resized_image, axis=0)
    # print(f"resized_image.shape: {resized_image.shape}" )

    return resized_image


def save_mask_inference(mask_resized):
    # print(f"save_mask_inference: {mask_inference_path}" )
    pass


def display(display_list):
    plt.figure(figsize=(20, 15))

    title = ['Input Image', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        # plt.imshow(tf.keras.preprocessing.image.array_to_img(display_list[i]))
        plt.imshow(display_list[i])
        plt.axis('off')
    plt.show()


def create_binary_segmentation_problem(image_dataset, mask_dataset):
    # change problem to binary segmentation problem
    x_reduced, y_reduced = [], []

    # iterate over masks
    for j, mask in tqdm(enumerate(mask_dataset)):

        # get image shape
        _img_height, _img_width, _img_channels = mask.shape

        # create binary image (zeros)
        binary_image = np.zeros((_img_height, _img_width, 1)).astype(int)

        # iterate over each pixel in mask
        for row in range(_img_height):
            for col in range(_img_width):
                # get image channel across axis=3
                rgb = mask[row, col, :]

                # building hex: #3C1098 = RGB(60, 16, 152) or BGR(152, 16, 60)
                binary_image[row, col] = 1 if rgb[0] == 60 and rgb[1] == 16 and rgb[2] == 152 else 0

        # only keep images with a high percentage of building coverage
        if np.count_nonzero(binary_image == 1) > 0.15 * binary_image.size:
            x_reduced.append(image_dataset[j])
            y_reduced.append(binary_image)

    # return binary image dataset
    return np.array(x_reduced), np.array(y_reduced)


In [5]:
import os
from glob import glob
from PIL import Image
from enum import Enum

# Classe identificadora de cores classificadas como terreno
class MaskColorMap(Enum):
    unlabeled = (0, 0, 0)
    paved_area = (128, 64, 128)
    dirt = (130, 76, 0)
    grass = (0, 102, 0)
    gravel = (112, 103, 87)
    water = (28, 42, 168)
    rocks = (48, 41, 30)
    pool = (0, 50, 89)
    vegetation = (107, 142, 35)
    roof = (70, 70, 70)
    wall = (102, 102, 156)
    window = (254, 228, 12)
    door = (254, 148, 12)
    fence = (190, 153, 153)
    fence_pole = (153, 153, 153)
    person = (255, 22, 96)
    dog = (102, 51, 0)
    car = (9, 143, 150)
    bicycle = (119, 11, 32)
    tree = (51, 51, 0)
    bald_tree = (190, 250, 190)
    ar_marker = (112, 150, 146)
    obstacle = (2, 135, 115)
    conflicting = (255, 0, 0)

def convert_maskcolor_to_bw(mask_image):
# # old_color = 255, 0, 255, 255
# # black = (0, 0, 0)
# # white = (255, 255, 255)
# terrain_colors = []

    noterrain_colors = []
    noterrain_colors.append(MaskColorMap.unlabeled.value)
    noterrain_colors.append(MaskColorMap.car.value)
    noterrain_colors.append(MaskColorMap.tree.value)
    noterrain_colors.append(MaskColorMap.pool.value)
    noterrain_colors.append(MaskColorMap.roof.value)
    noterrain_colors.append(MaskColorMap.fence_pole.value)
    noterrain_colors.append(MaskColorMap.wall.value)
    noterrain_colors.append(MaskColorMap.door.value)
    noterrain_colors.append(MaskColorMap.window.value)
    noterrain_colors.append(MaskColorMap.obstacle.value)
    noterrain_colors.append(MaskColorMap.vegetation.value)

    # colors_list = []
    # print("Packing colors masks")
    # for j, cls in enumerate(MaskColorMap):
    #     colors_list.append(cls.value)

    black = (0, 0, 0)
    white = (255, 255, 255)

    width, height = mask_image.size

    pix = mask_image.load()
    for x in range(0, width):
        for y in range(0, height):
            color = pix[x,y]

            if color in noterrain_colors:
                mask_image.putpixel((x, y), black)

            else:
                mask_image.putpixel((x, y), white)
                
    im = mask_image.convert('1')
    
    return im


In [65]:
import os
from glob import glob
from PIL import Image
from enum import Enum

# Classe identificadora de escala de cor preta classificadas como terreno
class MaskGrayMap(Enum):
    terrain_00  = 0
    terrain_01  = 11
    terrain_02  = 12
    terrain_03  = 24
    terrain_04  = 36
    terrain_05  = 48
    terrain_06  = 85
    terrain_07  = 97
    terrain_08  = 109
    terrain_09  = 133
    terrain_10  = 145
    terrain_11  = 218
    terrain_12 = 228
    terrain_13 = 230
    terrain_14 = 240
    terrain_15 = 252



def convert_maskgray_to_bw(mask_image):
    terrain_gray = []
    for _, gray_scale in enumerate(MaskGrayMap):
        terrain_gray.append(gray_scale.value)

    black = 0
    white = 255
    width, height = mask_image.size

    pix = mask_image.load()
    for x in range(0, width):
        for y in range(0, height):
            gray_color = pix[x,y]
            mask_image.putpixel((x, y), white)

            if gray_color not in terrain_gray:
                mask_image.putpixel((x, y), black)

    im = mask_image.convert('1')

    return im

def save_image_crivo(image: Image, percent: int = 15) -> bool:
    save = False
    array = np.array(image)
    mask_px_count = np.count_nonzero(array == 0)
    image_px_total = (image.size[0] * image.size[1])
    mask_percent = (mask_px_count / image_px_total) * 100
    if mask_percent >= percent:
        save = True

    print(f"Mask pixel count: {mask_px_count} | Image pixel total: {image_px_total}")
    print(f"{save}: crivo={percent}%, calculado:{mask_percent}%")
    return save

In [14]:
create_dir(mask_inference_path)
create_dir(mask_inference_resized_path)

In [15]:
def conv_block(inputs=None, n_filters=32, dropout_prob=0, max_pooling=True):
    conv = Conv2D(n_filters, 
                  kernel_size = 3,     
                  activation='relu',
                  padding='same',
                  kernel_initializer=tf.keras.initializers.HeNormal())(inputs)

    conv = Conv2D(n_filters, 
                  kernel_size = 3, 
                  activation='relu',
                  padding='same',
                  kernel_initializer=tf.keras.initializers.HeNormal())(conv)

    if dropout_prob > 0:
        conv = Dropout(dropout_prob)(conv)
        
    if max_pooling:
        next_layer = MaxPooling2D(pool_size=(2,2))(conv)

    else:
        next_layer = conv
        
    skip_connection = conv
    return next_layer, skip_connection


def upsampling_block(expansive_input, contractive_input, n_filters=32):
    up = Conv2DTranspose(
                 n_filters,  
                 kernel_size = 3,
                 strides=(2,2),
                 padding='same')(expansive_input)
    
    merge = concatenate([up, contractive_input], axis=3)
    
    conv = Conv2D(n_filters,  
                 kernel_size = 3,   
                 activation='relu',
                 padding='same',
                 kernel_initializer=tf.keras.initializers.HeNormal())(merge)
    
    conv = Conv2D(n_filters,  
                 kernel_size = 3,  
                 activation='relu',
                 padding='same',
                 kernel_initializer=tf.keras.initializers.HeNormal())(conv)
    return conv


def build_unet_model(input_size=(96, 128, 3), n_filters=32, n_classes=23):
    inputs = Input(input_size)
    
    # contracting path
    cblock1 = conv_block(inputs, n_filters, dropout_prob=0.4)
    cblock2 = conv_block(cblock1[0], 2*n_filters, dropout_prob=0.4)
    cblock3 = conv_block(cblock2[0], 4*n_filters, dropout_prob=0.4)
    cblock4 = conv_block(cblock3[0], 8*n_filters, dropout_prob=0.45) 
    cblock5 = conv_block(cblock4[0],16*n_filters, dropout_prob=0.4, max_pooling=None)     
    
    # expanding path
    ublock6 = upsampling_block(cblock5[0], cblock4[1],  8 * n_filters)
    ublock7 = upsampling_block(ublock6, cblock3[1],  n_filters*4)
    ublock8 = upsampling_block(ublock7,cblock2[1] , n_filters*2)
    ublock9 = upsampling_block(ublock8,cblock1[1],  n_filters)

    conv9 = Conv2D(n_filters,
                 3,
                 activation='relu',
                 padding='same',
                 kernel_initializer='he_normal')(ublock9)
    
    conv10 = Conv2D(n_classes, kernel_size=1, padding='same')(conv9)  
    model = tf.keras.Model(inputs=inputs, outputs=conv10, name=f"U-Net")
    return model


In [16]:
img_height = 96
img_width = 128
num_channels = 3

unet_model = build_unet_model((img_height, img_width, num_channels))

In [17]:
# O modelo mais recente pode ser baixado no kaggle no notebook -> output de criação do modelo
# https://www.kaggle.com/code/marciodelrei/aerial-semantic-segmentation-puc-rio-bi-master-tcc?kernelSessionId=134697143
unet_model.load_weights(r"modelos\20230613141845_bestModelWeigths.h5")
unet_model.summary()

Model: "U-Net"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 96, 128, 3)  0           []                               
                                ]                                                                 
                                                                                                  
 conv2d_20 (Conv2D)             (None, 96, 128, 32)  896         ['input_2[0][0]']                
                                                                                                  
 conv2d_21 (Conv2D)             (None, 96, 128, 32)  9248        ['conv2d_20[0][0]']              
                                                                                                  
 dropout_5 (Dropout)            (None, 96, 128, 32)  0           ['conv2d_21[0][0]']          

In [19]:
images = glob(f"{images_path}/*")
image = images[1]
print(image)


fotos\DJI_0091.JPG


In [66]:
images = glob(f"{images_path}/*")
entra = False
total_colors = []
cont_mask = 0
for idx, image in enumerate(images):
    # if cont_mask > 10:
    #     break
    
    # if idx == 226 :
    print(f"Nome imagem: {image}")
    base_name = os.path.basename(image)
    ext_name = base_name.split(".")[1]
    
    input_image = image
    image_original = process_path(input_image)
    input_image_preprocessed = preprocess_image(image_original)
    
    output_mask = inference(unet_model, input_image_preprocessed)
    # print(f"input_image_preprocessed.shape: {input_image_preprocessed.shape} | output_mask.shape: {output_mask.shape}")
    mask_predicted = create_mask(output_mask)
    # print(f"output_mask.shape: {output_mask.shape} | mask_predicted.shape: {mask_predicted.shape}")
    # display([input_image_preprocessed, mask_predicted])
    
    numpy_array = mask_predicted.numpy()
    # print(f"mask_predicted.shape: {mask_predicted.shape} | numpy_array.shape: {numpy_array.shape}")
    # display([input_image_preprocessed, numpy_array])

    normalized_array = (numpy_array - np.min(numpy_array)) / (np.max(numpy_array) - np.min(numpy_array)) * 255
    # print(f"numpy_array.shape: {numpy_array.shape} | normalized_array.shape: {normalized_array.shape}")
    # display([input_image_preprocessed, normalized_array])

    mask_normalized = normalized_array.astype(np.uint8)
    # print(f"normalized_array.shape: {normalized_array.shape} | mask_normalized.shape: {mask_normalized.shape}")
    # display([input_image_preprocessed, mask_normalized])

    mask_array = np.reshape(mask_normalized, (96, 128))
    # print(f"mask_normalized.shape: {mask_normalized.shape} | mask_array.shape: {mask_array.shape}")
    # display([image_original, mask_array])

    mask_data = Image.fromarray(mask_array)
    mask_data_resized = mask_data.resize((4000, 3000), resample=Image.Resampling.BOX)
    # print(f"len(mask_data.getcolors(): {len(mask_data.getcolors())}")
    # print(f"mask_array.shape: {mask_array.shape} | mask_data.size: {mask_data.size} | mask_data.getcolors: {mask_data.getcolors()}")
    mask_filename = os.path.join(mask_inference_path, f"{base_name}_mask_original.{ext_name}")
    mask_data_resized.save(mask_filename)
    
    mask_data_final = convert_maskgray_to_bw(mask_data)
    if save_image_crivo(mask_data_final, 0.5):
        mask_data_final_resized = mask_data_final.resize((4000, 3000), resample=Image.Resampling.BOX)
        mask_filename = os.path.join(mask_inference_resized_path, f"{base_name}_mask.{ext_name}")
        mask_data_final_resized.save(mask_filename)
        cont_mask += 1
    

    # mask_data_cores = mask_data.getcolors()
    # for tupla in mask_data_cores:
    #     total_colors.append(tupla[1])
        


       
    # break


Nome imagem: fotos\DJI_0090.JPG
Mask pixel count: 9 | Image pixel total: 12288
False: crivo=0.5%, calculado:0.0732421875%
Nome imagem: fotos\DJI_0091.JPG
Mask pixel count: 108 | Image pixel total: 12288
True: crivo=0.5%, calculado:0.87890625%
Nome imagem: fotos\DJI_0092.JPG
Mask pixel count: 119 | Image pixel total: 12288
True: crivo=0.5%, calculado:0.9684244791666666%
Nome imagem: fotos\DJI_0093.JPG
Mask pixel count: 162 | Image pixel total: 12288
True: crivo=0.5%, calculado:1.318359375%
Nome imagem: fotos\DJI_0094.JPG
Mask pixel count: 81 | Image pixel total: 12288
True: crivo=0.5%, calculado:0.6591796875%
Nome imagem: fotos\DJI_0095.JPG
Mask pixel count: 36 | Image pixel total: 12288
False: crivo=0.5%, calculado:0.29296875%
Nome imagem: fotos\DJI_0096.JPG
Mask pixel count: 63 | Image pixel total: 12288
True: crivo=0.5%, calculado:0.5126953125%
Nome imagem: fotos\DJI_0097.JPG
Mask pixel count: 57 | Image pixel total: 12288
False: crivo=0.5%, calculado:0.4638671875%
Nome imagem: fotos