# BerryBox

This notebook will prepare images as training sets for a FCN workflow to identify berries in images. Specifically, it will:  
+ Rotate images (if necessary)
+ Perform color correction
+ Create masks for training


## User input

Edit the following:

**image_directory**: full path to the folder that contains the berry box images.  


In [113]:
# Edit the following inputs
project_directory = "C:/Users/jeffrey.neyhart/OneDrive - USDA/Documents/CranberryLab/Phenomics/BerryBox/"
input_image_directory = project_directory + "/testPhotos"
output_image_directory = project_directory + "/fcn_model_building/imagesToAnnotate"
color_correction_standard = project_directory + "/resources/color_checker_standard1.JPG"
bayes_classifier_pdf = "C:/Users/jeffrey.neyhart/OneDrive - USDA/Documents/CranberryLab/Phenomics/BerryBox/resources/bayes_classifier/bayes_classified_pdf.out"

# Image extension
ext = ".JPG"

# Run color correction?
run_color_correction = False

# Rerun all images?
rerun_all_images = True

## Pipeline setup

Load packages, set directories, etc.

**Do not alter this or any code below**

In [114]:
# Load packages
import cv2 as cv
import imageio
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches
import os
from plantcv import plantcv as pcv
from skimage.filters import threshold_otsu
from skimage.segmentation import clear_border, watershed
from skimage.measure import label, regionprops_table, regionprops
from skimage.morphology import closing, square
from skimage.util import map_array
from skimage.feature import peak_local_max
from skimage.color import label2rgb
from scipy import ndimage as ndi


# Capitalize the extension
ext = ext.upper()

# List all input images
all_images = os.listdir(input_image_directory)
# Add the directory name to the image file name
all_images = [os.path.join(input_image_directory, x) for x in all_images if ext.upper() in x.upper()]

# Create output directory if missing
if not os.path.exists(output_image_directory):
    os.mkdir(output_image_directory)

    
#####
# Find the color card in the source file
#####

# Read in the color checker standard file
cc_img = imageio.imread(color_correction_standard)
h, w, d = cc_img.shape

# Find the color card in the color checker standard file
# The target image is the image with the color chart and no berries
# Downsize
scale_percent = 15 # percent of original size
new_h = int(h * scale_percent / 100)
new_w = int(w * scale_percent / 100)

df1, start, space = pcv.transform.find_color_card(rgb_img = cv.resize(cc_img, (new_w, new_h)))

# Resize the start and space outputs
start_use = [int(x / (scale_percent / 100)) for x in start]
start_use = (start_use[0], start_use[1])

space_use = [int(x / (scale_percent / 100)) for x in space]
space_use = (space_use[0], space_use[1])

# Create a mask
# Use these outputs to create a labeled color card mask
target_mask = pcv.transform.create_color_card_mask(rgb_img = cc_img, radius = 50, start_coord = start_use, 
                                                   spacing = space_use, ncols = 4, nrows = 6)

# get color matrix of target and save
target_headers, target_matrix = pcv.transform.get_color_matrix(cc_img, target_mask)


# Create an object of class QRCodeDetector
qrCodeDetector = cv.QRCodeDetector()

# # Open up a text file to store photo name and collection id
# image_collection_filename = os.path.join(output_directory, session_name + "_image_collectionID_link.txt")
# handle = open(image_collection_filename, "w")
# # Add a header
# handle.write("\t".join(["image_filename", "collection_id"]) + "\n")


## Run the pipeline



In [126]:
###
# This code block runs the entire pipeline for images
###

for img_file in all_images:

# # Example image
# img_file = all_images[25]

    
    # Skip this file?
    # Set the correct image filename
    corrected_image_filename = output_image_directory + "/" + os.path.basename(img_file).replace(ext, "") + "-corrected.PNG"
    # Does it exist
    if os.path.exists(corrected_image_filename) & (not rerun_all_images):
        continue

    # Read in the image
    img = imageio.imread(img_file)


    ###
    # Step 1: QR code reader
    ###

    # Crop the image to speed detection

#     # Get the dimensions
#     h, w, d = img.shape
#     scale_percent = 50
#     new_h = int(h * scale_percent / 100)
#     new_w = int(w * scale_percent / 100)

#     img2_crop = cv.resize(img, (new_w, new_h))

#     # # Print the image
#     # plt.figure()
#     # plt.imshow(img2_crop)
#     # plt.show()

#     # Detect and decode the QR code
#     collection_id, points, _ = qrCodeDetector.detectAndDecode(img2_crop)

#     # points is a 4 x 2 matrix where:
#     # [bl_x, bl_y]
#     # [tl_x, tl_y]
#     # [tr_x, tr_y]
#     # [br_x, br_y]
#     # 

#     ## Determine if the image should be rotated
#     ## 
#     ## First determine if the image is portrait or landscape
#     if h > w:
#         # Next determine if the QR code is in the upper half (rotate clockwise) or lower half (rotate
#         # counter-clockwise)
#         y_min = min([x[1] for x in points[0]])

#         img1 = np.asarray(img)

#         # If this is true, the qr code is in the upper half
#         if y_min < ((h * (scale_percent / 100)) / 2):
#             img1 = cv.rotate(img1, cv.ROTATE_90_CLOCKWISE)
#         else:
#             img1 = cv.rotate(img1, cv.ROTATE_90_COUNTERCLOCKWISE)

#     # Else just copy the image
#     else:
    img1 = img


    ###
    # Step 2: Color calibration
    ###

    # Only run this if run_color_correction = True
    if run_color_correction:
    
        # Use the plant CV Color Correction Workflow
        # located here: https://plantcv.readthedocs.io/en/stable/transform_correct_color/

        # Find the color checker in the source image
        # Downsize
        h, w, d = img1.shape
        scale_percent = 25 # Keep this at 25 to ensure the color card is identified
        new_h = int(h * scale_percent / 100)
        new_w = int(w * scale_percent / 100)

        df1, start, space = pcv.transform.find_color_card(rgb_img = cv.resize(img1, (new_w, new_h)))

        # Resize the start and space outputs
        start_use = [int(x / (scale_percent / 100)) for x in start]
        start_use = (start_use[0], start_use[1])

        space_use = [int(x / (scale_percent / 100)) for x in space]
        space_use = (space_use[0], space_use[1])

        # Create a mask
        # Use these outputs to create a labeled color card mask
        source_mask = pcv.transform.create_color_card_mask(rgb_img = img1, radius = 50, start_coord = start_use, 
                                                           spacing = space_use, ncols = 4, nrows = 6)

        # Get the source matrix
        source_headers, source_matrix = pcv.transform.get_color_matrix(img1, source_mask)

        ## Run color correction ##

        # matrix_a is a matrix of average rgb values for each color ship in source_img, matrix_m is a moore-penrose inverse matrix,
        # matrix_b is a matrix of average rgb values for each color ship in source_img
        matrix_a, matrix_m, matrix_b = pcv.transform.get_matrix_m(target_matrix = target_matrix, source_matrix = source_matrix)

        # deviance is the measure of how greatly the source image deviates from the target image's color space. 
        # Two images of the same color space should have a deviance of ~0.
        # transformation_matrix is a 9x9 matrix of transformation coefficients 
        deviance, transformation_matrix = pcv.transform.calc_transformation_matrix(matrix_m, matrix_b)

        img2 = pcv.transform.apply_transformation_matrix(source_img = img1, target_img = cc_img, 
                                                                  transformation_matrix = transformation_matrix)
        
    else:
        img2 = img1

    # Save the corrected image
    # New name of the file
    imageio.imwrite(corrected_image_filename, img2)



    ###
    # Step 3: Berry segmentation
    ###


    # Run the naive bayes classifier
    # Information can be found here: https://plantcv.readthedocs.io/en/stable/tutorials/machine_learning_tutorial/
    # 

    # Try resizing to speed up
    h, w, d = img2.shape
    scale_percent = 50
    new_h = int(h * scale_percent / 100)
    new_w = int(w * scale_percent / 100)

    img3 = cv.resize(img2, (new_w, new_h))

    # Run the classifier
    bayes_class_mask = pcv.naive_bayes_classifier(rgb_img = img3, pdf_file = bayes_classifier_pdf)
    # Get the berry mask
    berry_mask = bayes_class_mask["berry"]

    # Run the watershed segmentation
    distance = ndi.distance_transform_edt(berry_mask)

    local_max_coords = peak_local_max(distance, footprint=np.ones((50, 50)))
    local_max_mask = np.zeros(distance.shape, dtype=bool)
    local_max_mask[tuple(local_max_coords.T)] = True
    markers = label(local_max_mask)

    berry_seg = watershed(-distance, markers, mask = berry_mask)
    berry_seg = np.array(berry_seg)

    # Save
    berry_mask_seg_filename = output_image_directory + "/" + os.path.basename(img_file).replace(ext, "") + "-berry_mask_watershed.PNG"
    imageio.imwrite(berry_mask_seg_filename, berry_seg)


    ## Find and filter regions ##
    # Label mat
    label_mat = label(berry_seg, background = 0)
    if np.sum(label_mat) < 1:
        label_mat = np.ones_like(label_mat)
    elif np.sum(label_mat) >= 1:
        label_mat = label_mat

    # Get regions
    berry_regions = regionprops_table(label_mat, properties = ["label", "area", "eccentricity"])

    # Filter for berry regions
    condition = (berry_regions['area'] > 1000) & (berry_regions['area'] < 100000)
    # zero out labels not meeting condition
    input_labels = berry_regions['label']
    output_labels = input_labels * condition
    # Create a new mask from the subset of labels
    filtered_lab_image = map_array(label_mat, input_labels, output_labels).astype('uint8')

    # Threshold
    berry_mask1 = closing(filtered_lab_image > 0).astype("uint8")
    berry_mask2 = cv.resize(berry_mask1, (w, h))

    # Save the mask
    berry_mask_filename = output_image_directory + "/" + os.path.basename(img_file).replace(ext, "") + "-berry_mask.PNG"
    imageio.imwrite(berry_mask_filename, berry_mask2 * 255) # Need to rescale to 0-255 to save




# COMPONENT CODE

In [None]:
# Example image
img_file = all_images[0]

# Read in the image
img = imageio.imread(img_file)
# img = cv.imread(img_file, cv.IMREAD_COLOR)
# img = Image.open(img_file)

plt.figure()
plt.imshow(img)
plt.show()

### Step 1: QR code reader

This step finds and decodes the QR code in the images. It also determines if the image needs to be rotated.

In [None]:

# Crop the image to speed detection

# Get the dimensions
h, w, d = img.shape
scale_percent = 50
new_h = int(h * scale_percent / 100)
new_w = int(w * scale_percent / 100)

img2_crop = cv.resize(img, (new_w, new_h))

# # Print the image
# plt.figure()
# plt.imshow(img2_crop)
# plt.show()

# Detect and decode the QR code
collection_id, points, _ = qrCodeDetector.detectAndDecode(img2_crop)

# points is a 4 x 2 matrix where:
# [bl_x, bl_y]
# [tl_x, tl_y]
# [tr_x, tr_y]
# [br_x, br_y]
# 

## Determine if the image should be rotated
## 
## First determine if the image is portrait or landscape
if h > w:
    # Next determine if the QR code is in the upper half (rotate clockwise) or lower half (rotate
    # counter-clockwise)
    y_min = min([x[1] for x in points[0]])
    
    img1 = np.asarray(img)
    
    # If this is true, the qr code is in the upper half
    if y_min < ((h * (scale_percent / 100)) / 2):
        img1 = cv.rotate(img1, cv.ROTATE_90_CLOCKWISE)
    else:
        img1 = cv.rotate(img1, cv.ROTATE_90_COUNTERCLOCKWISE)
        
# Else just copy the image
else:
    img1 = img

plt.imshow(img1)
plt.show()
        

### Step 2: Color Calibration

This step performs color calibration of the images


In [None]:
# Use the plant CV Color Correction Workflow
# located here: https://plantcv.readthedocs.io/en/stable/transform_correct_color/

# Find the color checker in the source image
# Downsize
h, w, d = img1.shape
scale_percent = 20
new_h = int(h * scale_percent / 100)
new_w = int(w * scale_percent / 100)

df1, start, space = pcv.transform.find_color_card(rgb_img = cv.resize(img1, (new_w, new_h)))

# Resize the start and space outputs
start_use = [int(x / (scale_percent / 100)) for x in start]
start_use = (start_use[0], start_use[1])

space_use = [int(x / (scale_percent / 100)) for x in space]
space_use = (space_use[0], space_use[1])

# Create a mask
# Use these outputs to create a labeled color card mask
source_mask = pcv.transform.create_color_card_mask(rgb_img = img1, radius = 50, start_coord = start_use, 
                                                   spacing = space_use, ncols = 4, nrows = 6)

# Get the source matrix
source_headers, source_matrix = pcv.transform.get_color_matrix(img1, source_mask)

## Run color correction ##

# matrix_a is a matrix of average rgb values for each color ship in source_img, matrix_m is a moore-penrose inverse matrix,
# matrix_b is a matrix of average rgb values for each color ship in source_img
matrix_a, matrix_m, matrix_b = pcv.transform.get_matrix_m(target_matrix = target_matrix, source_matrix = source_matrix)

# deviance is the measure of how greatly the source image deviates from the target image's color space. 
# Two images of the same color space should have a deviance of ~0.
# transformation_matrix is a 9x9 matrix of transformation coefficients 
deviance, transformation_matrix = pcv.transform.calc_transformation_matrix(matrix_m, matrix_b)

img2 = pcv.transform.apply_transformation_matrix(source_img = img1, target_img = cc_img, 
                                                          transformation_matrix = transformation_matrix)



# # Run color correction
# target_matrix, source_matrix, transformation_matrix, img2 = pcv.transform.correct_color(target_img=cc_img, 
#                                                                                         target_mask=target_mask, 
#                                                                                         source_img=img1, 
#                                                                                         source_mask=source_mask, 
#                                                                                         output_directory=".")



# Save the corrected image
# New name of the file
corrected_image_filename = output_image_directory + "/" + os.path.basename(img_file).replace(ext, "") + "-corrected.PNG"
imageio.imwrite(corrected_image_filename, img2)



Using the matrix transformation took 13.01 seconds  
Using the transform color correction took 10.53 seconds


### Step 3: Berry Segmentation

This step identifies the berries in the image.


In [None]:
# Run the naive bayes classifier
# Information can be found here: https://plantcv.readthedocs.io/en/stable/tutorials/machine_learning_tutorial/
# 

# Try resizing to speed up
h, w, d = img2.shape
scale_percent = 100
new_h = int(h * scale_percent / 100)
new_w = int(w * scale_percent / 100)

img3 = cv.resize(img2, (new_w, new_h))

# Run the classifier
bayes_class_mask = pcv.naive_bayes_classifier(rgb_img = img3, pdf_file = bayes_classifier_pdf)

# Get the berry mask
berry_mask = bayes_class_mask['berry']

plt.imshow(berry_mask, cmap = "gray")
plt.show()

In [None]:

# Run the watershed segmentation
distance = ndi.distance_transform_edt(berry_mask)

local_max_coords = feature.peak_local_max(distance, footprint=np.ones((250, 250)))
local_max_mask = np.zeros(distance.shape, dtype=bool)
local_max_mask[tuple(local_max_coords.T)] = True
markers = measure.label(local_max_mask)

berry_seg = segmentation.watershed(-distance, markers, mask = berry_mask)
berry_seg = np.array(berry_seg)

plt.imshow(berry_seg, cmap = "gray")
plt.show()
    
# Save
berry_mask_seg_filename = output_image_directory + "/" + os.path.basename(img_file).replace(ext, "") + "-berry_mask_watershed.PNG"
imageio.imwrite(berry_mask_seg_filename, berry_seg)

In [None]:
# Label mat
label_mat = measure.label(berry_seg, background = 0)
if np.sum(label_mat) < 1:
    label_mat = np.ones_like(label_mat)
elif np.sum(label_mat) >= 1:
    label_mat = label_mat

# Get regions
berry_regions = measure.regionprops_table(label_mat, properties = ["label", "area", "eccentricity"])
# Filter for berry regions
condition = (berry_regions['area'] > 10000) & (berry_regions['area'] < 90000) & (berry_regions['eccentricity'] > 0.3)

# zero out labels not meeting condition
input_labels = berry_regions['label']
output_labels = input_labels * condition

# Create a new mask from the subset of labels
filtered_lab_image = util.map_array(label_mat, input_labels, output_labels).astype('uint8')

# Threshold
ret, berry_mask_bw = cv.threshold(filtered_lab_image * 255, 1, 255, cv.THRESH_BINARY)

plt.imshow(berry_mask_bw, cmap = "gray")
plt.show()



In [None]:
berry_mask1 = cv.resize(berry_mask_bw, (w, h))

# Save the mask
berry_mask_filename = output_image_directory + "/" + os.path.basename(img_file).replace(ext, "") + "-berry_mask.PNG"
imageio.imwrite(berry_mask_filename, berry_mask1)
