# Machine vision methods applied to image pre processing

Purpose: Create a data set containing only insect pictures collected from the experiment of Ong, 2022

Author: Gabriel R. Palma

## Loading packages

In [39]:
import cv2 as cv
import os
import matplotlib.pyplot as plt
import numpy as np
import math

In [40]:
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers

# Importing section
from tensorflow.keras import models
from keras import layers
from tensorflow.keras import optimizers
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
import numpy as np
from keras import backend as K
import os
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd
# Paper Correction
from keras import metrics
# End importing section
import cv2
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from scipy.io import wavfile
import pylab
from tensorflow.keras.utils import to_categorical
datagen = ImageDataGenerator(rescale=1)
batch_size = 32
from keras.layers import LeakyReLU, Conv2D, Input, BatchNormalization, Activation, Dense, Dropout, Conv2DTranspose, concatenate

## Functions used in this project

In [26]:
def get_images_labels_features(directory, sample_count, new_image_shape = (244, 244)):
    '''Loads and crop the images according the appropriate conditions. 
       Also, it returns the feature and labels of the VGG16 predictions'''
        
    images = np.zeros(shape=(sample_count, new_image_shape[0], new_image_shape[1], 3))    
    labels = np.zeros(shape=(sample_count))
    generator = datagen.flow_from_directory(directory,
                                            target_size = new_image_shape,
                                            batch_size = batch_size,
                                            class_mode = 'binary',
                                            shuffle = True,
                                            seed=0)
    i = 0

    for inputs_batch, labels_batch in generator:
                            
        images[i * batch_size : (i + 1) * batch_size] = inputs_batch        
        labels[i * batch_size : (i + 1) * batch_size] = labels_batch
        i += 1

        if i * batch_size >= sample_count:

             break
    return(images, labels)

def get_train_test_data(base_dir = 'input_data/DipteraDataset',
                        train_sample_size = 100, test_sample_size = 100,  
                        new_image_shape = (244, 244), 
                        rescale = 1):
    '''This function imports the dataset and split into train and test data return the features and labels of the images'''
        
    
    # Importing images data
    train_dir = os.path.join(base_dir, 'train')
    test_dir = os.path.join(base_dir, 'test')
    datagen = ImageDataGenerator(rescale=rescale)
    batch_size = 20    

    # Getting data features and labels    
    train_information = get_images_labels_features(directory = train_dir, 
                                                   sample_count = train_sample_size, 
                                                   new_image_shape = new_image_shape)    
    train_images = train_information[0]
    train_labels = train_information[1]

    test_information = get_images_labels_features(directory = test_dir, 
                                                  sample_count = test_sample_size, 
                                                  new_image_shape = new_image_shape)    
    test_images = test_information[0]
    test_labels = test_information[1]

    # Preparing data features    
    #train_labels = to_categorical(train_labels)
    #test_labels = to_categorical(test_labels)        
        
    return(train_images, train_labels, test_images, test_labels)

In [18]:
def get_vertical_line(img):
    ''' This function return the x and y values related to the needle of the image '''
    img = img.astype(np.uint8())
    gray = cv.cvtColor(img,cv.COLOR_BGR2GRAY)
    thrsholded_img = cv.adaptiveThreshold(gray,255,cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV,11,2)
    lines = cv.HoughLinesP(thrsholded_img,1,np.pi/360,100,minLineLength=80,maxLineGap=50)
    
    # Obtaining hough lines axis
    for line in lines:
        x1,y1,x2,y2 = line[0]
        break
        
    # Obtaning the m value for interporlation
    if x1 == x2:
        x1 = x1 + 1   
    m = (y2 - y1)/ (x2 - x1)        
    
    return(img, thrsholded_img, x1, y1, x2, y2, m)

def create_blob_detector(minThreshold = 10, 
                         maxThreshold = 200, 
                         filterByArea = True,
                         minArea = 150,
                         filterByCircularity = False,
                         minCircularity = 0.5, 
                         filterByConvexity = False, 
                         minConvexity = 0.87, 
                         filterByInertia = False, 
                         minInertiaRatio = 0.01):
    '''This function creates a blob detector given a number of parameters'''
    # Setup SimpleBlobDetector parameters.
    params = cv.SimpleBlobDetector_Params()

    # Change thresholds
    params.minThreshold = minThreshold;
    params.maxThreshold = maxThreshold;

    # Filter by Area.
    params.filterByArea = filterByArea
    params.minArea = minArea

    # Filter by Circularity
    params.filterByCircularity = filterByCircularity
    params.minCircularity = minCircularity

    # Filter by Convexity
    params.filterByConvexity = filterByConvexity
    params.minConvexity = minConvexity

    # Filter by Inertia
    params.filterByInertia = filterByInertia
    params.minInertiaRatio = minInertiaRatio
    
    detector = cv.SimpleBlobDetector_create(params)
    
    return(detector)

def dilate_and_erode_img(thrsholded_img, kernel_size, 
                         dilate_iterations,
                         erode_iterations):
    '''This function dilates and erode the thresholded image'''
    
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    dilated_img = cv.dilate(thrsholded_img, kernel, iterations = dilate_iterations)
    eroded_img = cv.erode(dilated_img, kernel, iterations = erode_iterations)
    
    return(eroded_img)

def get_masks(eroded_img, 
              x1, y1, x2, 
              y2):
    '''This function finds the masks based on contours, centers of gravity and moments from an eroded image'''
    mask_insect = np.ones(224*224).reshape(224, 224)
    mask_needle = np.zeros(224*224).reshape(224, 224)
    
    # Obtain the countour of the eroded image
    contours, _ = cv.findContours(eroded_img, cv.RETR_LIST, cv.CHAIN_APPROX_NONE) 
    cog_n = 0
    max_area = 0
    for i, c in enumerate(contours):
        area = cv.contourArea(c) 
        if area > max_area:
            max_area = area
            index = i
            ic = c    
    # Find moments of the countour
    M = cv.moments(ic)
    if M['m00'] != 0:
        cX = int(M["m10"] / M["m00"])
        cY = int(M["m01"] / M["m00"])      
    # Finding the box of the contour
    countour_box = cv.boundingRect(ic)    
    # Finding the contour area
    contour_area = cv.contourArea(ic)  
    # Finding aspect ratio
    x,y,w,h = countour_box
    aspect_ratio = float(w)/h
        
    # Find the new center based on the center of gravity
    dx = 112 - cX
    dy = 112 - cY        
    # Create the masks
    cv.fillPoly(mask_insect, pts = [ic], color = 0)
    cv.line(mask_needle,(int(x1), y1), (int(x2), y2), 1, 5)    
    
    return(mask_insect, mask_needle, cX, cY, dx, dy, countour_box, contour_area, aspect_ratio)

def remove_needle(mask_insect, mask_needle, img):
    '''This function use the created masks to remove the needles and interpolate to create an image including only the insect'''
    new_img = img
    mask = np.logical_and(mask_needle, mask_insect)
    for y in np.arange(0, 224, 1):    
        start = 0
        end = 0
        fs, fe = (True, True)

        for x in np.arange(1, 223):
            if mask[y, x] == True and mask[y, x - 1] == False and fs:
                start = x
                fs = False
            if mask[y, x] == False and mask[y, x - 1] == True and fe and fs == False:
                end = x
                fe = False


        if start != end:
            for point in np.arange(start, end, 1):            
                pl = new_img[y, start - 5, :]
                pr = new_img[y, end + 5, :]            
                i = point - start  
                n = end - start
                p = (pl * ((n - i)/n)) + (pr * (i/n))
                new_img[y, point, :] = p
    return(new_img)

def remove_background(croped_img, threshold = 180):
    '''This function removes the background of a given image'''
    results_img = croped_img.copy()
    img_x, img_y = results_img.shape[0:2]
    
    for x in np.arange(0, img_x):
        for y in np.arange(0, img_y):            
            if np.any(results_img[x, y, :] > threshold):

                results_img[x, y, :] = 0
    return(results_img)

In [19]:
def get_image_without_needle(image):
    '''This function apply all the preprocessing techniques to obtain an image without needle'''    
    img, thrsholded_img, x1, y1, x2, y2, m = get_vertical_line(image)
    detector = create_blob_detector()
    eroded_img = dilate_and_erode_img(thrsholded_img = thrsholded_img, kernel_size = 3, 
                                      dilate_iterations = 4,
                                      erode_iterations = 7)
    mask_insect, mask_needle, cX, cY, dx, dy, countour_box, countour_area, aspect_ratio = get_masks(eroded_img, x1, y1, x2, y2)
    new_img = remove_needle(mask_insect, mask_needle, img)
    return(new_img, countour_box, cX, cY, countour_area, aspect_ratio)

def extract_image_features(image):
    '''This function creates the new dataset without the needles'''
        
    new_img, countour_box, cX, cY, countour_area, aspect_ratio = get_image_without_needle(image = image)
    x, y, w, h = countour_box
    croped_img = new_img[y:(y+h), x:(x+w)]        
    nb_img = remove_background(croped_img, threshold = 180)  
    r_average = np.mean(new_img[:, :, 0])
    b_average = np.mean(new_img[:, :, 1])
    g_average = np.mean(new_img[:, :, 2])
    
    r_025quantile = np.quantile(new_img[:, :, 0], 0.025)
    b_025quantile = np.quantile(new_img[:, :, 1], 0.025)
    g_025quantile = np.quantile(new_img[:, :, 2], 0.025)
    
    r_975quantile = np.quantile(new_img[:, :, 0], 0.975)
    b_975quantile = np.quantile(new_img[:, :, 1], 0.975)
    g_975quantile = np.quantile(new_img[:, :, 2], 0.975)
    
    return(aspect_ratio, countour_area, r_average, b_average, 
           g_average, r_025quantile, g_025quantile, b_025quantile,
           r_975quantile, r_975quantile, g_975quantile, b_975quantile)

    

## Loading dataset

In [24]:
train_images, train_labels, test_images, test_labels = get_train_test_data(base_dir = '../../input_data/DipteraDataset/',
                                                                               train_sample_size = 2026, test_sample_size = 848, 
                                                                               new_image_shape = (224, 224))


Found 2026 images belonging to 5 classes.
I am in test
Found 848 images belonging to 5 classes.


In [41]:
train_labels

array([2., 1., 2., ..., 3., 1., 1.])

## Obtaining variables from each image

In [8]:
train_parameters = []
label = 0
for image in train_images:    
    aspect_ratio, countour_area, r_average, b_average, g_average, r_025quantile, g_025quantile, b_025quantile, r_975quantile, r_975quantile, g_975quantile, b_975quantile = extract_image_features(image = image)
    train_parameters.append([aspect_ratio, countour_area, r_average, g_average, 
                             b_average, r_025quantile, g_025quantile, b_025quantile,
                             r_975quantile, r_975quantile, g_975quantile, 
                             train_labels[label]])
    label = label + 1

In [9]:
test_parameters = []
label = 0
for image in test_images:
    aspect_ratio, countour_area, r_average, b_average, g_average, r_025quantile, g_025quantile, b_025quantile, r_975quantile, r_975quantile, g_975quantile, b_975quantile = extract_image_features(image = image)
    test_parameters.append([aspect_ratio, countour_area, r_average, g_average, 
                             b_average, r_025quantile, g_025quantile, b_025quantile,
                             r_975quantile, r_975quantile, g_975quantile, 
                             test_labels[label]])
    label = label + 1

In [10]:
train_dataset_dnn = pd.DataFrame(train_parameters, columns=['Aspect ratio', 'Countour area', 'R average', 'G average', 'B average', 
                                                            'R 0.25 % quantile', 'G 0.25 % quantile', 'B 0.25 % quantile', 
                                                            'R 97.5 % quantile', 'G 97.5 % quantile', 'B 97.5 % quantile', 'Class'])
test_dataset_dnn = pd.DataFrame(test_parameters, columns=['Aspect ratio', 'Countour area', 'R average', 'G average', 'B average', 
                                                            'R 0.25 % quantile', 'G 0.25 % quantile', 'B 0.25 % quantile', 
                                                            'R 97.5 % quantile', 'G 97.5 % quantile', 'B 97.5 % quantile', 'Class'])

In [25]:
#train_dataset_dnn.to_csv('../../output_data/Diptera_parameters_train.csv')
#test_dataset_dnn.to_csv('../../output_data/Diptera_parameters_test.csv')

In [11]:
test_dataset_dnn # Replace the Center x for 25% percentile and y for 75% percentile + get excentricity | Hue value from a RGB | Get Charles code

Unnamed: 0,Aspect ratio,Countour area,R average,G average,B average,R 0.25 % quantile,G 0.25 % quantile,B 0.25 % quantile,R 97.5 % quantile,G 97.5 % quantile,B 97.5 % quantile,Class
0,0.948529,5748.0,220.702009,219.789820,221.338010,51.0,56.0,62.0,238.0,238.0,239.0,1.0
1,1.168224,7045.5,208.658363,206.978755,208.423489,52.0,44.0,51.0,226.0,226.0,227.0,3.0
2,0.969925,6747.5,231.006736,230.377890,230.878388,60.0,61.0,62.0,249.0,249.0,249.0,1.0
3,0.863946,6956.5,213.174027,211.627870,212.719946,43.0,37.0,40.0,233.0,233.0,233.0,0.0
4,1.269565,7164.5,198.287428,194.817323,197.512357,48.0,36.0,47.0,215.0,215.0,216.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
843,0.734513,4162.0,196.237643,194.549705,195.608119,59.0,35.0,46.0,208.0,208.0,208.0,4.0
844,0.993377,8976.0,183.377332,182.305385,183.374402,21.0,20.0,21.0,209.0,209.0,209.0,0.0
845,0.734807,10114.5,208.519272,206.502790,207.808873,51.0,43.0,48.0,229.0,229.0,229.0,3.0
846,0.750000,6993.0,222.760583,220.487524,222.041992,67.0,58.0,64.0,242.0,242.0,242.0,3.0
