# berryFieldImageSegmentation

This notebook provides code to take full-sized annotated images and cut them up to develop a training/test set for evaluating the FCN model for identifying berries.


## Set user parameters

Set specific parameters for this session

In [1]:
#############################
#### Set user parameters ####
#############################

# Create a dictionary of training sets with input image sizes and image grid sizes
# "name": [output_image_size, (h, w) of output image grid, image_ext]
training_sets_dict = {
#     "train_use": [500, (3, 3), "jpg"],
    "train0500": [500, (3, 3), "jpg"],
    "train0750": [750, (2, 2), "jpg"],
    "train1500": [1500, (1, 1), "jpg"],
    "train0500_png": [500, (3, 3), "png"],
    "train0750_png": [750, (2, 2), "png"],
    "train1500_png": [1500, (1, 1), "png"]
}


# Proportion of images to use as testing
pTest = 0.2

# The image extension name
input_image_ext = "jpg"

# Project directory
proj_dir = "C:/Users/jeffrey.neyhart/OneDrive - USDA/Documents/CranberryLab/Phenomics/phenoCartImageAnalysis/berryImageSegmentation/FCN_Semantic_Segmentation_Model_Training"

# Directory containing the images
full_image_dir = "C:/Users/jeffrey.neyhart/OneDrive - USDA/Documents/CranberryLab/Phenomics/phenoCartImageAnalysis/berryImageSegmentation/trainingSetConstruction_imageSegmentation/images"
# Directory containing the masks
full_mask_dir = "C:/Users/jeffrey.neyhart/OneDrive - USDA/Documents/CranberryLab/Phenomics/phenoCartImageAnalysis/berryImageSegmentation/trainingSetConstruction_imageSegmentation/berryMasks"


# Flag to indicate whether independent testing should be performed
# If True, the testing images will not be included in the training images
# If False, the testing images will overlap with the training images
perform_independent_testing = True



## Additional setup

Load packages and set other directory paths

In [2]:
###########################################
#### Load packages and set other paths ####
###########################################

# Load packages
import os
import random
import shutil
import numpy as np
import cv2 as cv
from PIL import Image

# From https://stackoverflow.com/questions/312443/how-do-you-split-a-list-into-evenly-sized-chunks
def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]


# Train and test dirs
for dir1 in training_sets_dict:
    for dir2 in ["train", "test"]:
        for dir3 in ["images", "masks"]:
            full_dir = os.path.join(dir1, dir2, dir3)
            if os.path.exists(full_dir):
                shutil.rmtree(full_dir)
            os.makedirs(full_dir)
            

## Sample and copy images ##

# List all of the images
all_images = [os.path.join(full_image_dir, x) for x in os.listdir(full_image_dir) if input_image_ext.upper() in x.upper()]
# List all of the masks
all_masks = [os.path.join(full_mask_dir, x) for x in os.listdir(full_mask_dir) if input_image_ext.upper() in x.upper()]

## Make sure all of the images are of the same dimension
# Read in all images and store the dimensions; find the largest and smallest lengths/widths
image_dims = []
for img_file in all_images:
    img = Image.open(img_file)
    image_dims.append([os.path.basename(img_file)] + list(img.size))

# Calculate the minimum width and height
min_h = np.min([x[2] for x in image_dims])
min_w = np.min([x[1] for x in image_dims])



## Allocate images to training or testing

List the images that will go to training or testing


In [3]:
#######################################
#### Allocate images to train/test ####
#######################################

# List the basenames of the images
image_basenames = [os.path.basename(x) for x in all_images]
mask_basenames = [os.path.basename(x) for x in all_masks]

# Find only images with masks
mask_basenames1 = [x for x in mask_basenames if x.replace("_berry." + input_image_ext, "") in [y.replace("." + input_image_ext, "") for y in image_basenames]]
image_basenames1 = [x for x in image_basenames if x.replace("." + input_image_ext, "") in [y.replace("_berry." + input_image_ext, "") for y in mask_basenames]]

# List all basenames
all_basenames = [x.replace("." + input_image_ext, "") for x in image_basenames1]

# Set a random seed for reproducibility
random.seed(943)
# Sample basenames
test_basenames = random.sample(all_basenames, int(np.floor(pTest * len(all_basenames))))
train_basenames = [x for x in all_basenames if x not in test_basenames]

## Image resizing and splitting

Cut each image on center to the desired grid size `unsplit_image_size`

Then, split each image

In [4]:
######################################
#### Image resizing and splitting ####
######################################

# Iterate over the training set parameters
for train_key in training_sets_dict:
    
    train_val = training_sets_dict[train_key]
    
    # Get the image grid size and input image size
    input_image_size = train_val[0]
    r, c = train_val[1]
    unsplit_image_size = [r * input_image_size, c * input_image_size]
    out_image_ext = train_val[2]
    
    # Set the train and test dirs
    train_dir = os.path.join(train_key, "train")
    test_dir = os.path.join(train_key, "test")
    
    # Set an image counter
    idx = 1

    # Iterate over all of the basenames
    for img_basename in all_basenames:
    # img_basename = all_basenames[0]

        # Find the image path
        image_path = [x for x in all_images if img_basename in x][0]
        # Read in the image
        img = Image.open(image_path)

        ##### Crop the image #####

        # Crop to middle
        w, h = img.size
        to_crop_height = h - unsplit_image_size[0]
        to_crop_width = w - unsplit_image_size[1]
        to_crop_top = int(to_crop_height / 2)
        to_crop_bottom = to_crop_height - to_crop_top
        to_crop_right = int(to_crop_width / 2)
        to_crop_left = to_crop_width - to_crop_right

        # Pixels to keep
        hstart = to_crop_top
        hend = h - to_crop_bottom
        wstart = to_crop_left
        wend = w - to_crop_right

        # Crop
        img2 = img.crop((wstart, hstart, wend, hend))

        # Find the mask; read in
        mask_file = [x for x in all_masks if img_basename in x][0]
        msk = Image.open(mask_file)
        # Crop the mask
        msk2 = msk.crop((wstart, hstart, wend, hend))

        ##### Split the image #####

        # Is it a train image or test image?
        is_train = img_basename in train_basenames

        # List column (width) and rows (height)
        cols, rows = img2.size

        # Iterate over rows
        for r in range(0, rows, input_image_size):
            # Iterate over columns
            for c in range(0, cols, input_image_size):
                # Determine left, top, right, bottom
                left, top, right, bottom = c, r, c + input_image_size, r + input_image_size
                # Crop, convert to array
                img2_split_i = img2.crop((left, top, right, bottom))
                msk2_split_i = msk2.crop((left, top, right, bottom))

                img2_split_i_arr = cv.cvtColor(np.asarray(img2_split_i), cv.COLOR_BGR2RGB)
                msk2_split_i_arr = np.asarray(msk2_split_i)

                # If all elements of msk2_split_i_arr == 255, invert it
                if (msk2_split_i_arr == 255).all():
                    msk2_split_i_arr = msk2_split_i_arr - 255

                # Save the image and the mask
                # Include the image number
                img_save_base = img_basename.replace("cropped", f"{r}_{c}_image" + str(idx).zfill(4) + "." + out_image_ext)
                if is_train:
                    img_filename = os.path.join(train_dir, "images", img_save_base)
                    msk_filename = os.path.join(train_dir, "masks", img_save_base)
                else:
                    img_filename = os.path.join(test_dir, "images", img_save_base)
                    msk_filename = os.path.join(test_dir, "masks", img_save_base)

                # Save the image
                cv.imwrite(img_filename, img2_split_i_arr)
                # Save the mask
                cv.imwrite(msk_filename, msk2_split_i_arr)

                # Increase the image counter
                idx+=1

