In [1]:
from datetime import datetime, date, timedelta
import glob
from os.path import join, isfile, isdir, basename, splitext, exists
from os import makedirs
import numpy as np
import pandas as pd
import numpy.matlib
import cv2
import shutil
import copy
from collections import defaultdict
from pathlib import PureWindowsPath
from skimage.measure import regionprops, regionprops_table, find_contours, label
from skimage.morphology import convex_hull_image
from scipy.spatial.distance import cdist
from scipy.ndimage import binary_fill_holes
from joblib import Parallel, delayed


In [2]:
# List all files from a given path
def get_all_files(path, only_filenames, sorted_list):
    file_list = [x for x in glob.glob(join(path, '*.*')) if isfile(x)]
    if only_filenames:
        file_list = [x.split('\\')[-1] for x in file_list]
    if sorted_list:
        return sorted(file_list)
    return file_list

# List all folders from a given path
def get_all_folders(path, only_foldernames, sorted_list):
    file_list = [x for x in glob.glob(join(path, '*')) if isdir(x)]
    if only_foldernames:
        file_list = [x.split('\\')[-1] for x in file_list]
    if sorted_list:
        return sorted(file_list)
    return file_list


In [3]:
# Read mask from file
def read_mask(mask_path):
    init_mask = cv2.imread(mask_path)
    if len(init_mask.shape) == 2:
        return init_mask
    return np.max(init_mask, axis=2)

# Read leaf mask from file
def read_leaf_mask(base_leaves_path, leaf_id, filename, leaf_seq_in_path=True):
    if leaf_seq_in_path:
        mask_path = join(base_leaves_path,
                         'Leaf_{:03d}'.format(leaf_id),
                         'leaf seq',
                         'hidden leaf mask seq')
    else:
        mask_path = join(base_leaves_path,
                         'Leaf_{:03d}'.format(leaf_id),
                         'hidden leaf mask seq')
    return read_mask(join(mask_path, filename))

# Read stem mask from file
def read_stem_mask(base_leaves_path, leaf_id, filename):
    return read_mask(join(base_leaves_path,
                          'Leaf_{:03d}'.format(leaf_id),
                          'stem seq',
                          'hidden stem mask seq',
                          filename))


In [4]:
# Get all distinct filenames in a replicate
def get_all_filenames_in_replicate(rep_path, leaf_seq_in_path=True):
    all_filenames = []
    leaves_per_filename = defaultdict(list)
    for leaf_folder in get_all_folders(rep_path, only_foldernames=True, sorted_list=True):
        if leaf_seq_in_path:
            leaf_path = join(rep_path, leaf_folder, 'leaf seq', 'hidden leaf mask seq')
        else:
            leaf_path = join(rep_path, leaf_folder, 'hidden leaf mask seq')
        curr_filenames = get_all_files(
            leaf_path,
            only_filenames=True,
            sorted_list=False)
        all_filenames.extend(curr_filenames)
        for filename in curr_filenames:
            leaves_per_filename[filename].append(leaf_folder)
    return sorted(list(set(all_filenames))), leaves_per_filename

In [5]:
# Extract date and time from filename
def extract_date_time_from_filename(filename):
    dt_parts = [int(x) for i_x, x in enumerate(filename.split('_')) if i_x < 6]
    return tuple(dt_parts[1:6])

In [6]:
# Extract leaf contour
def extract_leaf_contour(leaf_mask):
    return find_contours(leaf_mask)[0]



# Rotate the contour so that it starts at the base of the petiole
# "pt_towards_plant_center" is a point towards the center of the plant, not the center of the leaf.
def make_contour_start_at_petiole_base(leaf_contour, pt_towards_plant_center):
    petiole_start_idx = np.argmin(cdist(pt_towards_plant_center, leaf_contour))
    return np.roll(leaf_contour, -petiole_start_idx, axis = 0)



# Calculate the length of each contour segment and return the cumulative contour length 
def cumulative_contour_length(contour):
    contour_length = np.zeros((len(contour),), dtype=float)
    contour_length[1:] = np.cumsum(np.sqrt(np.sum(np.square(contour[:-1, :] - contour[1:, :]),
                                                  axis=1)))
    return contour_length



# Take the length of a contour, map it to [0...1] and pick a point on the contour at the given ratio
def point_at_contour_ratio(contour, length_ratio, desired_ratio):
    # Pick the closest contour point after the given ratio
    idx = np.where(length_ratio >= desired_ratio)[0][0]
    if length_ratio[idx] == desired_ratio:
        # There is a precise contour point which is located at the desired ratio
        return contour[idx, :]
    
    # Find a point in between two contour points that lies at the desired point
    rest_ratio = desired_ratio - length_ratio[idx]
    rest_ratio_weigth = rest_ratio / (length_ratio[idx] - length_ratio[idx - 1])
    return (1 - rest_ratio_weigth) * contour[idx - 1, :] + rest_ratio_weigth * contour[idx, :]



# Extract leaf vein based on the leaf contour.
# "top_idx" represents the contour index of the top of the leaf
def extract_leaf_vein(leaf_contour, top_idx):
    # Compute left and right contours and the cumulative distances along each one,
    # as well as ratio values of each point along the contour it belongs to.
    left_contour = leaf_contour[:(1 + top_idx), :]
    right_contour = np.roll(leaf_contour, -1, axis=0)[(top_idx - 1):, :][::-1, :]  # Reverse order in order to start from petiole
    left_cum_length = cumulative_contour_length(left_contour)
    right_cum_length = cumulative_contour_length(right_contour)
    left_ratio = left_cum_length/left_cum_length[-1]
    right_ratio = right_cum_length/right_cum_length[-1] #  [x/right_cum_length[-1] for x in right_cum_length]

    # Pick the number of desired points along the leaf vein
    #     n_pts = max(left_contour.shape[0], right_contour.shape[0])
    n_pts = 20
    
    # Create leaf vein points as halfway points between points with the same ratio value on left and right contours
    leaf_vein = np.zeros((n_pts, 2), dtype=float)
    for i_pt in range(n_pts):
        curr_ratio = i_pt / (n_pts - 1)
        
        # Leaf vein points are calculated as halfway through the distance between
        # the points on left/right contours located at the same relative ratio
        leaf_vein[i_pt, :] = (point_at_contour_ratio(left_contour, left_ratio, curr_ratio) 
                              + point_at_contour_ratio(right_contour, right_ratio, curr_ratio)) / 2.0
    return leaf_vein



# Calculate a moving average along a given data vector "a"
# If radius is 2 the moving window has 1+2*radius size (1 for the middle element) 
def moving_average(a, radius):
    # w_sz is the window size
    w_sz = 1 + 2 * radius
    avg_res = np.zeros_like(a, dtype=float)
    
    # Compute cumulative sum
    a_cumsum = np.zeros((1 + a.size), dtype=float)  # One artificial zero is added at the beginning
    a_cumsum[1:] = np.cumsum(a, dtype=float)
    
    # Most average values can be computed as differences between the cumulative sum values at locations
    # found at w_sz distance from each other
    avg_res[radius:(a.size - radius)] = (a_cumsum[w_sz:] - a_cumsum[:(a.size - w_sz + 1)]) / w_sz
    
    # Some values at the beginning and at the end need special attention
    avg_res[0] = a[0]
    avg_res[-1] = a[-1]
    for idx in range(1, radius):
        curr_w_sz = 1 + 2 * idx
        avg_res[idx] = a_cumsum[curr_w_sz] / curr_w_sz
        avg_res[a.size - idx - 1] = (a_cumsum[-1] - a_cumsum[a.size - curr_w_sz]) / curr_w_sz        
    return avg_res


In [7]:
# Compute the intersection between an infinite line and a limited segment
# Points a and b define the line, points c and d define the segment
def line_segment_intersection(a, b, c, d):
    # The denominator comes from a special mathematical formula
    denominator = (a[0] - b[0]) * (c[1] - d[1]) - (a[1] - b[1]) * (c[0] - d[0])
    if denominator == 0:
        # The segment is parallel with the line. Return error.
        return False, None
    
    # Parametric values of the intersection:
    # t is between 0 and 1 if the intersection falls between the two points (a and b) of the line
    # u is between 0 and 1 if the intersection falls between the two points (c and d) of the segment
    t = ((a[0] - c[0]) * (c[1] - d[1]) - (a[1] - c[1]) * (c[0] - d[0])) / denominator
    u = ((a[0] - c[0]) * (a[1] - b[1]) - (a[1] - c[1]) * (a[0] - b[0])) / denominator

    # Check if lines actually intersect
    if (0 <= u <= 1):
        # Intersection falls inside the segment
        return True, np.array([a[0] + t * (b[0] - a[0]), a[1] + t * (b[1] - a[1])], dtype=float).reshape((1, 2))
    
    # Intersection falls outside the segment
    return False, None



# Compute the intersection between an infinite line (defined by two points) and a contour (a list of at least two points)
def line_contour_intersection(line, contour):
    intersection_pt = None
    min_intersection_dist = np.inf
    
    # Compute intersection of the line with each segment of the contour
    for seg_idx in range(contour.shape[0] - 1):
        intersection_found, curr_intersection_pt = line_segment_intersection(
            line[0, :], line[1, :], 
            contour[seg_idx, :], contour[seg_idx + 1, :])

        if intersection_found:
            curr_dist = cdist(line[:1, :], curr_intersection_pt)
            if curr_dist < min_intersection_dist:
                # Keep only the closest intersection
                intersection_pt = curr_intersection_pt
                min_intersection_dist = curr_dist
    return intersection_pt



# Compute the distance from a point to an infinite line by projecting it orthogonally to the line
def distance_from_pt_to_line(pt, line):
    return np.linalg.norm(np.cross(-np.diff(line, axis=0), line[0, :] - pt)) / np.linalg.norm(-np.diff(line, axis=0))


In [8]:
# Find plant centers based on stem overlaps at the center
def find_plant_centers___per_rep(stem_rep_path):  # , base_combi_vis_path):
    stem_sums = dict()
    all_leaf_foldernames = get_all_folders(stem_rep_path, only_foldernames=True, sorted_list=True)
    for leaf_foldername in all_leaf_foldernames:
        leaf_id = int(leaf_foldername[-3:])
        stem_masks_path = join(stem_rep_path, leaf_foldername, 'stem seq', 'hidden stem mask seq')
        all_mask_paths = get_all_files(stem_masks_path, only_filenames=False, sorted_list=True)
        for mask_path in all_mask_paths:
            curr_mask_filename = mask_path.split('\\')[-1]
            curr_mask = read_mask(join(stem_rep_path,
                                       'Leaf_{:03d}'.format(leaf_id),
                                       'stem seq',
                                       'hidden stem mask seq',
                                       curr_mask_filename)).astype(float) / 255.0
            if curr_mask_filename not in stem_sums:
                stem_sums[curr_mask_filename] = curr_mask
            else:
                if np.any(np.array(stem_sums[curr_mask_filename].shape) < np.array(curr_mask.shape)):
                    enlarged_img = np.zeros((max(stem_sums[curr_mask_filename].shape[0], curr_mask.shape[0]),
                                             max(stem_sums[curr_mask_filename].shape[1], curr_mask.shape[1])),
                                            dtype=stem_sums[curr_mask_filename].dtype)
                    enlarged_img[:stem_sums[curr_mask_filename].shape[0],
                                 :stem_sums[curr_mask_filename].shape[1]] = stem_sums[curr_mask_filename]
                    stem_sums[curr_mask_filename] = enlarged_img
                stem_sums[curr_mask_filename][:curr_mask.shape[0], :curr_mask.shape[1]] += curr_mask
                    
    plant_centers = dict()
    for filename, stem_sum in stem_sums.items():
        plant_centers[filename] = np.unravel_index(np.argmax(stem_sum), stem_sum.shape)  # (Y, X) order
    
    return plant_centers, stem_sums

In [9]:
# Compute isotropies at replicate level
def compute_rep_isotropy(leaf_base_path, dataset, acc, rep, leaf_seq_in_path,
                         leaf_mask_profiles, plant_centers, parallelize):
    isotropies = []
    leaf_angles_per_date = defaultdict(list)
    for profile in leaf_mask_profiles:
        time_parts = profile[1].split(':')
        leaf_angles_per_date[profile[0].year, profile[0].month, profile[0].day,
                             int(time_parts[0]), int(time_parts[1])].append((profile[4], profile[15]))  # leaf, angle
    all_filenames, leaves_per_filename = get_all_filenames_in_replicate(join(leaf_base_path, dataset, acc, rep),
                                                                        leaf_seq_in_path)
    for filename in all_filenames:
        curr_special_convex_hull = None
        curr_plant_center = plant_centers[filename]
        mask_cache = dict()
        year, month, day, hour, minute = extract_date_time_from_filename(filename)
        curr_leaf_angles = sorted(leaf_angles_per_date[year, month, day, hour, minute],
                                  key=lambda x: x[1])
        n_items = len(curr_leaf_angles)
        for i_leaf in range(n_items):
            leaf_id, leaf_angle = curr_leaf_angles[i_leaf]
            if leaf_seq_in_path:
                leaf_path = join(leaf_base_path, dataset, acc, rep, 'Leaf_{:03d}'.format(leaf_id),
                                 'leaf seq', 'hidden leaf mask seq')
            else:
                leaf_path = join(leaf_base_path, dataset, acc, rep, 'Leaf_{:03d}'.format(leaf_id),
                                 'hidden leaf mask seq')
            if leaf_id not in mask_cache.keys():
                mask_cache[leaf_id] = (read_mask(
                    join(leaf_path, filename)) > 0).astype(np.uint8)
            next_leaf_id = (i_leaf + 1) % n_items
            if next_leaf_id not in mask_cache.keys():
                mask_cache[next_leaf_id] = (read_mask(
                    join(leaf_path, filename)) > 0).astype(np.uint8)
            if np.any(np.array(mask_cache[leaf_id].shape) != np.array(mask_cache[next_leaf_id].shape)):
                resized_next_mask = np.zeros_like(np.array(mask_cache[leaf_id]))
                copy_ranges = (min(mask_cache[leaf_id].shape[0], mask_cache[next_leaf_id].shape[0]),
                               min(mask_cache[leaf_id].shape[1], mask_cache[next_leaf_id].shape[1]))
                resized_next_mask[:copy_ranges[0], :copy_ranges[1]] = \
                    mask_cache[next_leaf_id][:copy_ranges[0], :copy_ranges[1]]
                combi_convex_hull = np.maximum(mask_cache[leaf_id], resized_next_mask)
            else:
                combi_convex_hull = np.maximum(mask_cache[leaf_id], mask_cache[next_leaf_id])
            combi_convex_hull[curr_plant_center[1], curr_plant_center[0]] = 1
            combi_convex_hull = convex_hull_image(combi_convex_hull)
            if curr_special_convex_hull is None:
                curr_special_convex_hull = combi_convex_hull
            else:
                if np.any(np.array(curr_special_convex_hull.shape) != np.array(combi_convex_hull.shape)):
                    resized_convex_hull = np.zeros_like(np.array(curr_special_convex_hull))
                    copy_ranges = (min(curr_special_convex_hull.shape[0], combi_convex_hull.shape[0]),
                                   min(curr_special_convex_hull.shape[1], combi_convex_hull.shape[1]))
                    resized_convex_hull[:copy_ranges[0], :copy_ranges[1]] = \
                        combi_convex_hull[:copy_ranges[0], :copy_ranges[1]]
                    combi_convex_hull = np.maximum(curr_special_convex_hull, resized_convex_hull)
                else:
                    curr_special_convex_hull = np.maximum(curr_special_convex_hull, combi_convex_hull)
        if curr_special_convex_hull is None:
            print('ERROR: curr_special_convex_hull is None!')
        else:
            curr_special_convex_hull = binary_fill_holes(curr_special_convex_hull).astype(int)
            pix_mask_props = regionprops(curr_special_convex_hull)[0]
            curr_date = date(year, month, day)
            curr_time = '{:02d}:{:02d}'.format(hour, minute)
            isotropies.append((curr_date, curr_time, dataset, acc, int(rep[-2:]),
                               np.round(4 * np.pi * pix_mask_props['area'] / (pix_mask_props['perimeter']
                                                                              * pix_mask_props['perimeter']),
                                        3)))
    return isotropies
            


In [12]:
# Compute length and width of leaf lobe
def compute_length_and_width_of_leaf_lobe(leaf_mask_contour_XY, plant_center_XY):
    leaf_dists = compute_contour_distances(plant_center_XY, leaf_mask_contour_XY)
    idx_bottom_pt = np.argmin(leaf_dists)
    rolled_leaf_contour_XY = np.roll(leaf_mask_contour_XY, -idx_bottom_pt, axis = 0)

    idx_top_pt = (np.argmax(leaf_dists) - idx_bottom_pt) % leaf_dists.shape[0]
    leaf_vein = extract_leaf_vein(rolled_leaf_contour_XY, idx_top_pt)

    inters_pts_left = []
    inters_pts_right = []
    dists = [0.0]
    # First and last points on the leaf vein do not count
    for seg_idx_vein in range(1, leaf_vein.shape[0] - 1):
        curr_angle = np.arctan2(leaf_vein[seg_idx_vein, 1] - leaf_vein[(seg_idx_vein - 1), 1], 
                                leaf_vein[seg_idx_vein, 0] - leaf_vein[(seg_idx_vein - 1), 0])
        ortho_angle = np.mod(np.pi/2.0 + curr_angle, np.pi)
        line = leaf_vein[(seg_idx_vein - 1):(seg_idx_vein + 1), :][::-1, :].copy()
        line[1, 0] = line[0, 0] + 10 * np.cos(ortho_angle)
        line[1, 1] = line[0, 1] + 10 * np.sin(ortho_angle)
        curr_inters_pt_left = line_contour_intersection(line, rolled_leaf_contour_XY[:(1+idx_top_pt), :])
        curr_inters_pt_right = line_contour_intersection(line, rolled_leaf_contour_XY[idx_top_pt:, :])
        if curr_inters_pt_left is not None and curr_inters_pt_right is not None:
            inters_pts_left.append(curr_inters_pt_left)
            inters_pts_right.append(curr_inters_pt_right)
            dists.append(cdist(curr_inters_pt_left, curr_inters_pt_right)[0, 0])
    dists.append(0.0)
    
    cum_lengths = cumulative_contour_length(leaf_vein)
    
    return cum_lengths[-1], max(dists)


In [13]:
# Compute ellipse based on 2nd central moment of a mask
def get_ellipse_based_on_2nd_central_moment(mask):
    mask_points = np.transpose(np.nonzero(mask))
    if mask_points.size == 0:
        return [0.0,  # Center.X
                0.0,  # Center.Y
                -1.0,  # Major radius
                -1.0,  # Minor radius
                -1.0,  # Angle major radius
                -1000.0]  # Area
    if mask_points.size == 1:
        return [mask_points[0, 1] + 0.5,  # Center.X
                mask_points[0, 0] + 0.5,  # Center.Y
                0.5,  # Major radius
                0.5,  # Minor radius
                0.0,  # Angle major radius
                1.0]  # Area

    ellipse_data = []
    
    M00 = float(mask_points.shape[0])
    M01 = 0.0
    M10 = 0.0
    M02 = 0.0
    M20 = 0.0
    M11 = 0.0
    for i_pt in range(mask_points.shape[0]):
        M01 += mask_points[i_pt, 0]
        M10 += mask_points[i_pt, 1]
        M02 += mask_points[i_pt, 0] * mask_points[i_pt, 0]
        M20 += mask_points[i_pt, 1] * mask_points[i_pt, 1]
        M11 += mask_points[i_pt, 1] * mask_points[i_pt, 0]
    ellipse_data.append(M10 / M00)  # Center.X
    ellipse_data.append(M01 / M00)  # Center.Y
    
    mu_20 = M20 / M00 - ellipse_data[0] * ellipse_data[0]
    mu_02 = M02 / M00 - ellipse_data[1] * ellipse_data[1]
    mu_11 = M11 / M00 - ellipse_data[0] * ellipse_data[1]
    
    delta = np.sqrt(4 * mu_11 * mu_11 + (mu_20 - mu_02) * (mu_20 - mu_02))
    lambda_1 = ((mu_20 + mu_02) + delta) / 2.0
    lambda_2 = ((mu_20 + mu_02) - delta) / 2.0
    
    ellipse_data.append(np.sqrt(2.0 * np.abs(lambda_1)))  # Major radius
    ellipse_data.append(np.sqrt(2.0 * np.abs(lambda_2)))  # Minor radius
    
    angle = 0.0
    if mu_20 != mu_02:
        angle = 0.5 * np.arctan([(2.0 * mu_11) / (np.sign(mu_20 - mu_02) * (mu_20 - mu_02)),
                                 1]) * 180.0 / np.pi
    angle = np.mod(angle + 90.0, 180.0)
    ellipse_data.append(angle)  # Angle major radius
    
    ellipse_data.append(M00)  # Area
    
    return ellipse_data
    

# Compute distances from center to contour pixels
def compute_contour_distances(center, contour):
    return cdist(center, contour).flatten()


# Compute leaf mask profile (i.e. list of traits)
def compute_leaf_mask_profile(leaf_mask, leaf_column_list, plant_center, pot_area_calibrated, calibration_factor = 1):
    leaf_mask_profile = dict()
    pix_mask_props = regionprops(leaf_mask)[0]
    leaf_mask_profile['l_area (mm^2)'] = pix_mask_props['area'] * calibration_factor * calibration_factor
    l_convex_hull_area_calibrated = pix_mask_props['area_convex'] * calibration_factor * calibration_factor
    leaf_mask_profile['l_perimeter (mm)'] = pix_mask_props['perimeter'] * calibration_factor
    leaf_mask_profile['l_eccentricity'] = pix_mask_props['eccentricity']
    
    leaf_mask_profile['l_roundness'] = \
        4.0 * np.pi * leaf_mask_profile['l_area (mm^2)'] / (leaf_mask_profile['l_perimeter (mm)']
                                                            * leaf_mask_profile['l_perimeter (mm)'])
    
    contour_results = cv2.findContours(leaf_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    leaf_mask_contour_XY = np.squeeze(contour_results[0])
    pix_bounding_circle_center, pix_bounding_circle_radius = cv2.minEnclosingCircle(leaf_mask_contour_XY)
    leaf_mask_profile['l_circumference (mm)'] = 2.0 * np.pi * pix_bounding_circle_radius * calibration_factor
    
    leaf_mask_profile['l_compactness'] = leaf_mask_profile['l_area (mm^2)'] / l_convex_hull_area_calibrated

    leaf_mask_profile['l_surface_coverage'] = \
        leaf_mask_profile['l_area (mm^2)'] / (np.pi * pix_bounding_circle_radius * pix_bounding_circle_radius
                                              * calibration_factor * calibration_factor)

    pix_rot_bbox_center, (pix_rot_bbox_width, pix_rot_bbox_height), rot_bbox_angle = cv2.minAreaRect(leaf_mask_contour_XY)
    l_bounding_box_area_calibrated = pix_rot_bbox_width * pix_rot_bbox_height * calibration_factor * calibration_factor
    
    leaf_mask_profile['l_extent'] = leaf_mask_profile['l_area (mm^2)'] / l_bounding_box_area_calibrated
    
    ellipse_2nd_central_moment = get_ellipse_based_on_2nd_central_moment(leaf_mask)
    leaf_mask_profile['l_RMA'] = 0.0
    if ellipse_2nd_central_moment[2] > 0:  # Major radius
        leaf_mask_profile['l_RMA'] = \
            2 * np.sqrt((ellipse_2nd_central_moment[2] / 2.0) ** 2
                        - (ellipse_2nd_central_moment[3] / 2.0) ** 2) / ellipse_2nd_central_moment[2]

    leaf_dists = compute_contour_distances(plant_center[:, ::-1], leaf_mask_contour_XY)
    idx_bottom_pt = np.argmin(leaf_dists)
    leaf_bottom_pt = np.reshape(leaf_mask_contour_XY[idx_bottom_pt, ::-1], (1, 2))
    leaf_top_pt = np.reshape(leaf_mask_contour_XY[np.argmax(leaf_dists), ::-1], (1, 2))
    rolled_leaf_contour_XY = np.roll(leaf_mask_contour_XY, -idx_bottom_pt, axis = 0)
    rolled_idx_top_pt = (np.argmax(leaf_dists) - idx_bottom_pt) % leaf_mask_contour_XY.shape[0]
    
    leaf_mask_profile['l_angle (deg)'] = np.arctan2(leaf_bottom_pt[0, 0] - leaf_top_pt[0, 0],
                                                    leaf_top_pt[0, 1] - leaf_bottom_pt[0, 1]) * 180.0 / np.pi
    

    pix_lamina_length, pix_lamina_width = compute_length_and_width_of_leaf_lobe(leaf_mask_contour_XY[::-1, :],
                                                                                plant_center[:, ::-1])
    leaf_mask_profile['l_lamina_length (mm)'] = pix_lamina_length * calibration_factor
    leaf_mask_profile['l_width (mm)'] = pix_lamina_width * calibration_factor
    leaf_mask_profile['l_petiole_length (mm)'] = \
        compute_contour_distances(plant_center, leaf_bottom_pt)[0] * calibration_factor
    leaf_mask_profile['l_length (mm)'] = \
        leaf_mask_profile['l_petiole_length (mm)'] + leaf_mask_profile['l_lamina_length (mm)']
    leaf_mask_profile['SOL'] = \
        leaf_mask_profile['l_lamina_length (mm)'] * leaf_mask_profile['l_lamina_length (mm)'] \
        / leaf_mask_profile['l_area (mm^2)']

    leaf_mask_profile['l_LAI'] = leaf_mask_profile['l_area (mm^2)'] / pot_area_calibrated
    
    leaf_key_list = leaf_column_list + ['l_LAI']
    return [np.round(leaf_mask_profile[k], 3) for k in leaf_key_list]


In [14]:
# Compute leaf traits, per leaf
def compute_leaf_traits___per_leaf(leaf_base_path, dataset, acc, rep, leaf, leaf_seq_in_path,
                                   leaf_column_list, plant_centers, pot_area_calibrated, calibration_factor):
    leaf_mask_profiles = []
    leaf_masks_per_datetime = defaultdict(list)
    if leaf_seq_in_path:
        leaf_path = join(leaf_base_path, dataset, acc, rep, leaf, 'leaf seq', 'hidden leaf mask seq')
    else:
        leaf_path = join(leaf_base_path, dataset, acc, rep, leaf, 'hidden leaf mask seq')
    for i_leaf, leaf_instance in enumerate(get_all_files(leaf_path,
                                                         only_filenames=True,
                                                         sorted_list=True)):
        leaf_mask = (read_mask(join(leaf_path, leaf_instance)) > 0).astype(np.uint8)
        year, month, day, hour, minute = extract_date_time_from_filename(leaf_instance)
        curr_date = date(year, month, day)
        curr_time = '{:02d}:{:02d}'.format(hour, minute)
        leaf_mask_profiles.append(
            [curr_date, curr_time, acc, int(rep[-2:]), int(leaf[-3:])] + compute_leaf_mask_profile(
                leaf_mask, leaf_column_list,
                np.array(plant_centers[leaf_instance], ndmin=2), pot_area_calibrated, calibration_factor))
        leaf_masks_per_datetime['{:04d}-{:02d}-{:02d}'.format(curr_date.year, curr_date.month, curr_date.day),
                                curr_time].append((leaf, leaf_mask))
    return leaf_mask_profiles, leaf_masks_per_datetime

In [15]:
# Compute leaf traits, per replicate
def compute_leaf_traits___per_rep(leaf_base_path, dataset, acc, rep,
                                  stem_base_path, leaf_seq_in_path,
                                  leaf_column_list, pot_area_calibrated, calibration_factor, parallelize):
    leaf_mask_profiles = []
    leaf_LAI_per_datetime = defaultdict(list)
    leaf_masks_per_datetime = defaultdict(list)
    plant_density_masks_per_datetime = dict()
    leaf_overlapping_percentage = dict()
    rep_SOL_per_datetime = dict()
    leaf_day_diff_per_rep = dict()
    leaf_day_das_per_rep = dict()
    rep_path = join(stem_base_path, dataset, acc, rep)
    plant_centers, stem_sums_per_filename = find_plant_centers___per_rep(rep_path)
    for leaf in get_all_folders(rep_path, only_foldernames=True, sorted_list=True):
        if not parallelize:
            print(f'\t\t\t>>>>> Leaf: "{leaf}"')
        leaf_instance_profiles, curr_leaf_masks_per_datetime = compute_leaf_traits___per_leaf(
            leaf_base_path, dataset, acc, rep, leaf, leaf_seq_in_path,
            leaf_column_list[:-1], plant_centers, pot_area_calibrated, calibration_factor)
        for i_leaf, leaf_instance_profile in enumerate(leaf_instance_profiles):
            curr_date = leaf_instance_profile[0]
            curr_date_str = '{:04d}-{:02d}-{:02d}'.format(curr_date.year, curr_date.month, curr_date.day)
            curr_time = leaf_instance_profile[1]
            leaf_mask_profiles.append(leaf_instance_profile[:-1])  # Eliminate LAI from the common leaf traits
            leaf_LAI_per_datetime[curr_date_str, curr_time].append(leaf_instance_profile[-1])  # LAI added here
            if (curr_date_str, curr_time) not in leaf_masks_per_datetime:
                for curr_leaf_id, curr_leaf_mask in curr_leaf_masks_per_datetime[curr_date_str, curr_time]:
                    if curr_leaf_id == leaf:
                        leaf_masks_per_datetime[curr_date_str, curr_time].append((curr_leaf_id, curr_leaf_mask))
                        plant_density_masks_per_datetime[curr_date_str, curr_time] = curr_leaf_mask.copy()
            else:
                for curr_leaf_id, curr_leaf_mask in curr_leaf_masks_per_datetime[curr_date_str, curr_time]:
                    if curr_leaf_id == leaf:
                        if np.any(np.array(plant_density_masks_per_datetime[curr_date_str, curr_time].shape)
                                  != np.array(curr_leaf_mask.shape)):
                            resized_curr_leaf_mask = np.zeros_like(np.array(
                                plant_density_masks_per_datetime[curr_date_str, curr_time]))
                            copy_ranges = (min(resized_curr_leaf_mask.shape[0], curr_leaf_mask.shape[0]),
                                           min(resized_curr_leaf_mask.shape[1], curr_leaf_mask.shape[1]))
                            resized_curr_leaf_mask[:copy_ranges[0], :copy_ranges[1]] = \
                                curr_leaf_mask[:copy_ranges[0], :copy_ranges[1]]
                            leaf_masks_per_datetime[curr_date_str, curr_time].append((curr_leaf_id, resized_curr_leaf_mask))
                            plant_density_masks_per_datetime[curr_date_str, curr_time] += resized_curr_leaf_mask
                        else:
                            leaf_masks_per_datetime[curr_date_str, curr_time].append((curr_leaf_id, curr_leaf_mask))
                            plant_density_masks_per_datetime[curr_date_str, curr_time] += curr_leaf_mask
    # Compute overlapping percentages
    for curr_date_str, curr_time in leaf_masks_per_datetime.keys():
        curr_plant_density_mask = plant_density_masks_per_datetime[curr_date_str, curr_time]
        for curr_leaf_id, curr_leaf_mask in leaf_masks_per_datetime[curr_date_str, curr_time]:
            leafs_plant_density_mask = curr_plant_density_mask.copy()
            leafs_plant_density_mask[np.logical_not(curr_leaf_mask.astype(bool))] = 0
            leaf_overlapping_percentage[curr_date_str, curr_time, acc, rep, curr_leaf_id] = \
                np.round(100.0 * np.sum((leafs_plant_density_mask - curr_leaf_mask) > 0) / np.sum(curr_leaf_mask), 3)
    # Add overlapping percentages to the rest of the data
    for leaf_instance_profile in leaf_mask_profiles:
        curr_date = leaf_instance_profile[0]
        curr_date_str = '{:04d}-{:02d}-{:02d}'.format(curr_date.year, curr_date.month, curr_date.day)
        curr_time = leaf_instance_profile[1]
        leaf_instance_profile.append(
            leaf_overlapping_percentage[curr_date_str, curr_time, acc, rep, 'Leaf_{:03d}'.format(leaf_instance_profile[4])])
        
    rep_isotropies = compute_rep_isotropy(leaf_base_path, dataset, acc, rep, leaf_seq_in_path,
                                          leaf_mask_profiles, plant_centers, parallelize)
    plant_traits = []
    for curr_date, curr_time, curr_dataset, curr_acc, curr_rep, curr_isotropy in rep_isotropies:
        curr_date_str = curr_date.strftime('%Y-%m-%d')
        plant_traits.append([curr_date, curr_time, curr_acc, curr_rep,
                             curr_isotropy,
                             sum(leaf_LAI_per_datetime[curr_date_str, curr_time])])
        ds2_start_screening_date = date(2022, 8, 2)
        das_offset = 13 if curr_dataset == 'leaf_dataset1' else 11
        if (curr_dataset, curr_acc, curr_rep) not in leaf_day_diff_per_rep.keys():
            # We are at the first date from this combination, we can calculate the day difference
            init_date = curr_date
            day_delta = timedelta(0)
            if curr_dataset == 'leaf_dataset2':
                day_delta = timedelta(days=(curr_date - ds2_start_screening_date).days)
        diff_to_first_day = timedelta(days=(curr_date - init_date).days)
        leaf_day_diff_per_rep[curr_dataset, curr_acc, curr_rep] = day_delta
        leaf_day_das_per_rep[curr_dataset, curr_acc, curr_rep, curr_date_str] = diff_to_first_day + timedelta(days=das_offset)
    # Compute slenderness of leaves (SOL)
    plant_SOL = dict()
    for filename, curr_stem_sum in stem_sums_per_filename.items():
        year, month, day, hour, minute = extract_date_time_from_filename(filename)
        curr_date = date(year, month, day)
        curr_date_str = '{:04d}-{:02d}-{:02d}'.format(year, month, day)
        curr_time = '{:02d}:{:02d}'.format(hour, minute)
        curr_plant_density_mask = plant_density_masks_per_datetime[curr_date_str, curr_time]
        if np.any(np.array(curr_stem_sum.shape) != np.array(curr_plant_density_mask.shape)):
            whole_img = np.zeros((max(curr_stem_sum.shape[0], curr_plant_density_mask.shape[0]),
                                  max(curr_stem_sum.shape[1], curr_plant_density_mask.shape[1])),
                                 dtype=curr_stem_sum.dtype)
            whole_img[:curr_stem_sum.shape[0],
                      :curr_stem_sum.shape[1]] = curr_stem_sum
        else:
            whole_img = curr_stem_sum.copy()
        whole_img[:curr_plant_density_mask.shape[0],
                  :curr_plant_density_mask.shape[1]] += curr_plant_density_mask
        curr_area = np.sum(whole_img > 0) * calibration_factor * calibration_factor
        curr_leaf_ids = set([int(x[0][-3:]) for x in leaf_masks_per_datetime[curr_date_str, curr_time]])
        leaf_lengths = [x[15] for x in leaf_mask_profiles
                        if x[0].strftime('%Y-%m-%d') == curr_date_str
                        and x[1] == curr_time
                        and x[2] == acc
                        and x[3] == int(rep[4:])
                        and x[4] in curr_leaf_ids]
        sum_leaf_lengths = sum(leaf_lengths)
        plant_SOL[curr_date_str, curr_time, acc, int(rep[4:])] = sum_leaf_lengths * sum_leaf_lengths / curr_area
    for plant_trait in plant_traits:
        plant_trait.append(plant_SOL[plant_trait[0].strftime('%Y-%m-%d'),
                                     plant_trait[1],
                                     plant_trait[2],
                                     plant_trait[3]])
    return leaf_mask_profiles, plant_traits, leaf_day_diff_per_rep, leaf_day_das_per_rep

In [16]:

def shift_dates(init_profiles, curr_dataset, leaf_day_diff_per_rep, leaf_day_das_per_rep):
    profiles = []
    for profile in init_profiles:
        curr_date = profile[0]
        curr_date_str = curr_date.strftime('%Y-%m-%d')
        curr_time = profile[1]
        curr_acc = profile[2]
        curr_rep = profile[3]
        if curr_dataset == 'leaf_dataset1':
            profiles.append(
                [curr_date,
                 curr_time,
                 leaf_day_das_per_rep[curr_dataset, curr_acc, curr_rep, curr_date_str]]
                + profile[2:])
        else:
            profiles.append(
                [curr_date - leaf_day_diff_per_rep[curr_dataset, curr_acc, curr_rep],
                 curr_time,
                 leaf_day_das_per_rep[curr_dataset, curr_acc, curr_rep, curr_date_str]]
                + profile[2:])
    return profiles

In [17]:
# Save data to Excel file
def save_to_excel(df_list, sheet_name_list, file_path):
    n_items = len(df_list)
    if n_items == 0:
        print('>>>>>>> WARNING: NOTHING TO SAVE TO EXCEL.')
        return
    writer = pd.ExcelWriter(file_path, date_format='dd-mm-yyyy')
    for i_df in range(n_items):
        df = df_list[i_df]
        sheet_name = sheet_name_list[i_df]
        df.to_excel(writer, sheet_name=sheet_name, index=False)
        for column in df:
            column_length = int(1.35 * max(df[column].astype(str).map(len).max(), len(column)))
            col_idx = df.columns.get_loc(column)
            writer.sheets[sheet_name].set_column(col_idx, col_idx, column_length)
    writer.close()


In [20]:
# Main script
leaf_base_path = r''
stem_base_path = r''
out_path = r''
calibration_factors = dict([('leaf_dataset1', 0.13715), ('leaf_dataset2', 0.14690)])  # Was 0.13888 for DS1 and 0.15698 for DS2
# Pot 1 area manually segmented from Go-0/rep_08/1000334_2022_05_13_12_01_41-6-3-TC03-RGB1_pot_A2_Go-0-08.png
# Pot 2 area manually segmented from Ler-1/rep_30/1000711_2022_08_09_17_01_44-7-29-TC06-RGB1_pot_D3_Ler-1-30.png
pot_areas_calibrated = dict(
    [('leaf_dataset1', 238177 * calibration_factors['leaf_dataset1'] * calibration_factors['leaf_dataset1']),
     ('leaf_dataset2', 151381 * calibration_factors['leaf_dataset2'] * calibration_factors['leaf_dataset2'])])
parallelize = True
leaf_seq_in_path = False
prefix_column_list = ['Date', 'Time', 'DAS', 'Accession', 'Rep_num', 'Leaf_num']
leaf_column_list = ['l_area (mm^2)',
                    'l_perimeter (mm)',
                    'l_roundness', 'l_circumference (mm)', 'l_eccentricity', 'l_compactness',
                    'l_extent',
                    'l_surface_coverage', 'l_RMA',
                    'l_angle (deg)', 'l_length (mm)', 'l_lamina_length (mm)', 'l_petiole_length (mm)', 'l_width (mm)',
                    'SOL', 'l_overlapping (%)']
plant_column_list = ['Date', 'Time', 'DAS', 'Accession', 'Rep_num', 'Isotropy', 'LAI', 'SOL']
Rep_Leaf_Traits_AVG_col_translations = dict(
    [('l_area (mm^2)', 'l_area_AVG (mm^2)'),
     ('l_perimeter (mm)', 'l_perimeter_AVG (mm)'),
     ('l_roundness', 'l_roundness_AVG'),
     ('l_circumference (mm)', 'l_circumference_AVG (mm)'),
     ('l_eccentricity', 'l_eccentricity_AVG'),
     ('l_compactness', 'l_compactness_AVG'),
     ('l_extent', 'l_extent_AVG'),
     ('l_length (mm)', 'l_length_AVG (mm)'),
     ('l_width (mm)', 'l_width_AVG (mm)'),
     ('l_RMA', 'l_RMA_AVG'),
     ('l_overlapping (%)', 'l_overlapping_AVG (%)'),
     ('SOL', 'SOL_AVG')])
Accession_Leaf_Traits_col_translations = dict(
    [('l_area (mm^2)', 'l_area_AVG (mm^2)'),
     ('l_perimeter (mm)', 'l_perimeter_AVG (mm)'),
     ('l_roundness', 'l_roundness_AVG'),
     ('l_circumference (mm)', 'l_circumference_AVG (mm)'),
     ('l_eccentricity', 'l_eccentricity_AVG'),
     ('l_compactness', 'l_compactness_AVG'),
     ('l_extent', 'l_extent_AVG'),
     ('l_length (mm)', 'l_length_AVG (mm)'),
     ('l_width (mm)', 'l_width_AVG (mm)'),
     ('l_RMA', 'l_RMA_AVG'),
     ('l_overlapping (%)', 'l_overlapping_AVG (%)'),
     ('SOL', 'SOL_AVG')])

print('\n+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++')
print('Complete datasets with cropped and visible leaves...')
for dataset in get_all_folders(leaf_base_path, only_foldernames=True, sorted_list=True):
    print(f'> Dataset: "{dataset}"')
    makedirs(join(out_path, dataset, 'Excels'))
    ds_leaf_mask_profiles = []
    ds_plant_traits = []
    ds_df_list = []
    ds_sheet_name_list = []
    all_leaf_day_diff_per_rep = dict()
    all_leaf_day_das_per_rep = dict()
    for acc in get_all_folders(join(leaf_base_path, dataset), only_foldernames=True, sorted_list=True):
        print(f'\t>>> Accession: "{acc}"')
        makedirs(join(out_path, dataset, 'Excels', acc))
        init_acc_leaf_mask_profiles = []
        init_acc_plant_traits = []
        all_rep_folders = get_all_folders(join(leaf_base_path, dataset, acc), only_foldernames=True, sorted_list=True)
        if len(all_rep_folders) == 0:
            print('\t\t\t----------------- No rep folders!!!! -----------------')
        if parallelize:
            results = Parallel(n_jobs=-2)(
                delayed(compute_leaf_traits___per_rep)(leaf_base_path, dataset, acc, rep,
                                                       stem_base_path, leaf_seq_in_path,
                                                       leaf_column_list,
                                                       pot_areas_calibrated[dataset], calibration_factors[dataset],
                                                       parallelize) 
                for rep in all_rep_folders)
            for rep_leaf_mask_profiles, rep_plant_traits, leaf_day_diff_per_rep, leaf_day_das_per_rep in results:
                init_acc_leaf_mask_profiles.extend(rep_leaf_mask_profiles)
                init_acc_plant_traits.extend(rep_plant_traits)
                all_leaf_day_diff_per_rep.update(leaf_day_diff_per_rep)
                all_leaf_day_das_per_rep.update(leaf_day_das_per_rep)
        else:
            for rep in all_rep_folders:
                print(f'\t\t>>>>> Rep: "{rep}"')
                rep_leaf_mask_profiles, \
                rep_plant_traits, \
                leaf_day_diff_per_rep, \
                leaf_day_das_per_rep = \
                    compute_leaf_traits___per_rep(
                        leaf_base_path, dataset, acc, rep,
                        stem_base_path, leaf_seq_in_path,
                        leaf_column_list,
                        pot_areas_calibrated[dataset], calibration_factors[dataset],
                        parallelize)
                init_acc_leaf_mask_profiles.extend(rep_leaf_mask_profiles)
                init_acc_plant_traits.extend(rep_plant_traits)
                all_leaf_day_diff_per_rep.update(leaf_day_diff_per_rep)
                all_leaf_day_das_per_rep.update(leaf_day_das_per_rep)
        ################################
        # Save per accession           #
        ################################
        acc_df_list = []
        # "Leaf_Traits" sheet
        acc_sheet_name_list = []
        if len(init_acc_leaf_mask_profiles) == 0:
            print('ERROR: init_acc_leaf_mask_profiles is empty!')
        else:
            acc_leaf_mask_profiles = shift_dates(init_acc_leaf_mask_profiles, dataset, all_leaf_day_diff_per_rep, all_leaf_day_das_per_rep)
            acc_df_leaf_traits = pd.DataFrame(sorted(acc_leaf_mask_profiles, key=lambda x: (x[3], x[4], x[5], x[0], x[1])),
                                              columns=prefix_column_list + leaf_column_list)
            acc_df_list.append(acc_df_leaf_traits)
            acc_sheet_name_list.append('Leaf_Traits')
        
        # "Plant_Traits" sheet
        if len(init_acc_plant_traits) == 0:
            print('ERROR: init_acc_plant_traits is empty!')
        else:
            acc_plant_traits = shift_dates(init_acc_plant_traits, dataset, all_leaf_day_diff_per_rep, all_leaf_day_das_per_rep)
            acc_df_plant_traits = pd.DataFrame(sorted(acc_plant_traits, key=lambda x: (x[3], x[4], x[0], x[1])),
                                               columns=plant_column_list)
            acc_df_list.append(acc_df_plant_traits)
            acc_sheet_name_list.append('Plant_Traits')
        
        # "Rep_Leaf_Traits_AVG" sheet
        acc_df_rep_leaf_traits_AVG_source = \
            acc_df_leaf_traits[[prefix_column_list[x] for x in [0, 3, 4, 5]]  # ['Date', 'Accession', 'Rep_num', 'Leaf_num']
                               + list(Rep_Leaf_Traits_AVG_col_translations.keys())].groupby(
            ['Date', 'Accession', 'Rep_num', 'Leaf_num'],
            as_index=False).mean().round(3)
        acc_df_rep_leaf_traits_AVG = acc_df_rep_leaf_traits_AVG_source[
            [col for col in acc_df_rep_leaf_traits_AVG_source if col not in ['Date']]].groupby(
            ['Accession', 'Rep_num', 'Leaf_num'],
            as_index=False).mean().round(3)
        acc_df_rep_leaf_traits_AVG.rename(columns=Rep_Leaf_Traits_AVG_col_translations, inplace=True)
        acc_df_rep_leaf_traits_AVG.sort_values(['Accession', 'Rep_num', 'Leaf_num'], inplace=True)
#         print(acc_df_rep_leaf_traits_AVG)
        if acc_df_rep_leaf_traits_AVG.shape[0] == 0:
            print('ERROR: acc_df_rep_leaf_traits_AVG is empty!')
        else:
            acc_df_list.append(acc_df_rep_leaf_traits_AVG)
            acc_sheet_name_list.append('Rep_Leaf_Traits_AVG')
        
        acc_df_acc_leaf_traits_AVG_source = \
            acc_df_leaf_traits[[prefix_column_list[x] for x in [0, 2, 3, 4, 5]]  # ['Date', 'DAS', 'Accession', 'Rep_num', 'Leaf_num']
                               + list(Accession_Leaf_Traits_col_translations.keys())].groupby(
            ['Date', 'Accession', 'Rep_num', 'Leaf_num'],
            as_index=False).mean().round(3)
        acc_df_acc_leaf_traits_AVG = acc_df_acc_leaf_traits_AVG_source[
            [col for col in acc_df_acc_leaf_traits_AVG_source if col not in ['Rep_num']]].groupby(
            ['Date', 'Accession', 'Leaf_num'],
            as_index=False).mean().round(3)
        acc_df_acc_leaf_traits_AVG.rename(columns=Accession_Leaf_Traits_col_translations, inplace=True)
        acc_df_acc_leaf_traits_AVG = acc_df_acc_leaf_traits_AVG[
            ['Date', 'DAS', 'Accession', 'Leaf_num']
            + [col for col in acc_df_acc_leaf_traits_AVG if col not in ['Date', 'DAS', 'Accession', 'Leaf_num']]]
        acc_df_acc_leaf_traits_AVG.sort_values(['Accession', 'Leaf_num', 'Date'], inplace=True)
        if acc_df_acc_leaf_traits_AVG.shape[0] == 0:
            print('ERROR: acc_df_acc_leaf_traits_AVG is empty!')
        else:
            acc_df_list.append(acc_df_acc_leaf_traits_AVG)
            acc_sheet_name_list.append('Accession_Leaf_Traits_AVG')

        # Save to Excel
        save_to_excel(acc_df_list, acc_sheet_name_list, join(out_path, dataset, 'Excels', acc, acc + '.xlsx'))
        
        ds_leaf_mask_profiles.extend(acc_leaf_mask_profiles)
        ds_plant_traits.extend(acc_plant_traits)
        
        ################################
        # Save per replicate           #
        ################################
        # Now also save per rep
        curr_rep_folders = ['rep_{:02d}'.format(x)
                            for x in acc_df_leaf_traits.loc[acc_df_leaf_traits['Accession'] == acc]['Rep_num'].unique()]
        for rep in curr_rep_folders:
            makedirs(join(out_path, dataset, 'Excels', acc, rep))
            rep_df_list = []
            rep_sheet_name_list = []

            # "Leaf_Traits" sheet
            rep_df_leaf_traits = acc_df_leaf_traits.loc[acc_df_leaf_traits['Rep_num'] == int(rep[-2:])]
            if rep_df_leaf_traits.shape[0] == 0:
                print('ERROR: rep_df_leaf_traits is empty!')
            else:
                rep_df_list.append(rep_df_leaf_traits)
                rep_sheet_name_list.append('Leaf_Traits')
            
            # "Plant_Traits" sheet
            rep_df_plant_traits = acc_df_plant_traits.loc[acc_df_plant_traits['Rep_num'] == int(rep[-2:])]
            if rep_df_plant_traits.shape[0] == 0:
                print('ERROR: rep_plant_traits is empty!')
            else:
                rep_df_list.append(rep_df_plant_traits)
                rep_sheet_name_list.append('Plant_Traits')
            
            # "Rep_Leaf_Traits_AVG" sheet
            rep_df_rep_leaf_traits_AVG_source = \
                rep_df_leaf_traits[[prefix_column_list[x] for x in [0, 3, 4, 5]]  # ['Date', 'Accession', 'Rep_num', 'Leaf_num']
                                   + list(Rep_Leaf_Traits_AVG_col_translations.keys())].groupby(
                ['Date', 'Accession', 'Rep_num', 'Leaf_num'],
                as_index=False).mean().round(3)
            rep_df_rep_leaf_traits_AVG = rep_df_rep_leaf_traits_AVG_source[
                [col for col in rep_df_rep_leaf_traits_AVG_source if col not in ['Date']]].groupby(
                ['Accession', 'Rep_num', 'Leaf_num'],
                as_index=False).mean().round(3)
            rep_df_rep_leaf_traits_AVG.rename(columns=Rep_Leaf_Traits_AVG_col_translations, inplace=True)
            rep_df_rep_leaf_traits_AVG.sort_values(['Accession', 'Rep_num', 'Leaf_num'], inplace=True)
            if rep_df_rep_leaf_traits_AVG.shape[0] == 0:
                print('ERROR: rep_df_rep_leaf_traits_AVG is empty!')
            else:
                rep_df_list.append(rep_df_rep_leaf_traits_AVG)
                rep_sheet_name_list.append('Rep_Leaf_Traits_AVG')
        
            # "Accession_Leaf_Traits" sheet
            rep_df_acc_leaf_traits_AVG_source = \
                rep_df_leaf_traits[[prefix_column_list[x] for x in [0, 2, 3, 4, 5]]  # ['Date', 'DAS', 'Accession', 'Rep_num', 'Leaf_num']
                                   + list(Accession_Leaf_Traits_col_translations.keys())].groupby(
                ['Date', 'Accession', 'Rep_num', 'Leaf_num'],
                as_index=False).mean().round(3)
            rep_df_acc_leaf_traits_AVG = rep_df_acc_leaf_traits_AVG_source[
                [col for col in rep_df_acc_leaf_traits_AVG_source if col not in ['Rep_num']]].groupby(
                ['Date', 'Accession', 'Leaf_num'],
                as_index=False).mean().round(3)
            rep_df_acc_leaf_traits_AVG.rename(columns=Accession_Leaf_Traits_col_translations, inplace=True)
            rep_df_acc_leaf_traits_AVG = rep_df_acc_leaf_traits_AVG[
                ['Date', 'DAS', 'Accession', 'Leaf_num']
                + [col for col in rep_df_acc_leaf_traits_AVG if col not in ['Date', 'DAS', 'Accession', 'Leaf_num']]]
            rep_df_acc_leaf_traits_AVG.sort_values(['Accession', 'Leaf_num', 'Date'], inplace=True)
#             print(rep_df_acc_leaf_traits_AVG)
            if rep_df_acc_leaf_traits_AVG.shape[0] == 0:
                print('ERROR: rep_df_acc_leaf_traits_AVG is empty!')
            else:
                rep_df_list.append(rep_df_acc_leaf_traits_AVG)
                rep_sheet_name_list.append('Accession_Leaf_Traits_AVG')
            
            # Save to Excel
            save_to_excel(rep_df_list, rep_sheet_name_list, join(out_path, dataset, 'Excels', acc, rep, rep + '.xlsx'))
#         break
            
    ################################
    # Save per dataset             #
    ################################
    # "Leaf_Traits" sheet
    if len(ds_leaf_mask_profiles) == 0:
        print('ERROR: ds_leaf_mask_profiles is empty!')
    else:
        ds_df_leaf_traits = pd.DataFrame(sorted(ds_leaf_mask_profiles, key=lambda x: (x[3], x[4], x[5], x[0], x[1])),
                                         columns=prefix_column_list + leaf_column_list)
        ds_df_list.append(ds_df_leaf_traits)
        ds_sheet_name_list.append('Leaf_Traits')

    # "Plant_Traits" sheet
    if len(ds_plant_traits) == 0:
        print('ERROR: ds_plant_traits is empty!')
    else:
        ds_df_plant_traits = pd.DataFrame(sorted(ds_plant_traits, key=lambda x: (x[3], x[4], x[0], x[1])),
                                          columns=plant_column_list)
        ds_df_list.append(ds_df_plant_traits)
        ds_sheet_name_list.append('Plant_Traits')
        
    # "Rep_Leaf_Traits_AVG" sheet
    ds_df_rep_leaf_traits_AVG_source = \
        ds_df_leaf_traits[[prefix_column_list[x] for x in [0, 3, 4, 5]]  # ['Date', 'Accession', 'Rep_num', 'Leaf_num']
                          + list(Rep_Leaf_Traits_AVG_col_translations.keys())].groupby(
        ['Date', 'Accession', 'Rep_num', 'Leaf_num'],
        as_index=False).mean().round(3)
    ds_df_rep_leaf_traits_AVG = ds_df_rep_leaf_traits_AVG_source[
        [col for col in ds_df_rep_leaf_traits_AVG_source if col not in ['Date']]].groupby(
        ['Accession', 'Rep_num', 'Leaf_num'],
        as_index=False).mean().round(3)
    ds_df_rep_leaf_traits_AVG.rename(columns=Rep_Leaf_Traits_AVG_col_translations, inplace=True)
    ds_df_rep_leaf_traits_AVG.sort_values(['Accession', 'Rep_num', 'Leaf_num'], inplace=True)
    if ds_df_rep_leaf_traits_AVG.shape[0] == 0:
        print('ERROR: ds_df_rep_leaf_traits_AVG is empty!')
    else:
        ds_df_list.append(ds_df_rep_leaf_traits_AVG)
        ds_sheet_name_list.append('Rep_Leaf_Traits_AVG')
        
    # "Accession_Leaf_Traits" sheet
    ds_df_acc_leaf_traits_AVG_source = \
        ds_df_leaf_traits[[prefix_column_list[x] for x in [0, 2, 3, 4, 5]]  # ['Date', 'DAS', 'Accession', 'Rep_num', 'Leaf_num']
                          + list(Accession_Leaf_Traits_col_translations.keys())].groupby(
        ['Date', 'Accession', 'Rep_num', 'Leaf_num'],
        as_index=False).mean().round(3)
    ds_df_acc_leaf_traits_AVG = ds_df_acc_leaf_traits_AVG_source[
        [col for col in ds_df_acc_leaf_traits_AVG_source if col not in ['Rep_num']]].groupby(
        ['Date', 'Accession', 'Leaf_num'],
        as_index=False).mean().round(3)
    ds_df_acc_leaf_traits_AVG.rename(columns=Accession_Leaf_Traits_col_translations, inplace=True)
    ds_df_acc_leaf_traits_AVG = ds_df_acc_leaf_traits_AVG[
        ['Date', 'DAS', 'Accession', 'Leaf_num']
        + [col for col in ds_df_acc_leaf_traits_AVG if col not in ['Date', 'DAS', 'Accession', 'Leaf_num']]]

    ds_df_acc_leaf_traits_AVG.sort_values(['Accession', 'Leaf_num', 'Date'], inplace=True)
    if ds_df_acc_leaf_traits_AVG.shape[0] == 0:
        print('ERROR: ds_df_acc_leaf_traits_AVG is empty!')
    else:
        ds_df_list.append(ds_df_acc_leaf_traits_AVG)
        ds_sheet_name_list.append('Accession_Leaf_Traits_AVG')

    # Save to Excel
    save_to_excel(ds_df_list, ds_sheet_name_list, join(out_path, dataset, 'Excels', 'Excel_ALL.xlsx'))



+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Complete datasets with cropped and visible leaves...
> Dataset: "leaf_dataset1"
	>>> Accession: "Ba4-1"
	>>> Accession: "Ba5-1"
	>>> Accession: "Bch-4"
	>>> Accession: "Col-0"
	>>> Accession: "Cvi-0"
	>>> Accession: "Ei-6"
	>>> Accession: "Go-0"
	>>> Accession: "Hs-0"
	>>> Accession: "Is-1"
	>>> Accession: "Kz-9"
	>>> Accession: "Ler-1"
	>>> Accession: "Lz-0"
	>>> Accession: "Or-0"
	>>> Accession: "Sav-0"
	>>> Accession: "TOU-I-17"
	>>> Accession: "TOU-J-3"
	>>> Accession: "Uk-1"
	>>> Accession: "Uk-4"
	>>> Accession: "Utrecht"
	>>> Accession: "Ws-2"
	>>> Accession: "Zdr-1"
> Dataset: "leaf_dataset2"
	>>> Accession: "Col-0"
	>>> Accession: "Cvi-0"
	>>> Accession: "Is-1"
	>>> Accession: "Kz-9"
	>>> Accession: "Ler-1"
	>>> Accession: "TOU-I-17"
	>>> Accession: "Uk-1"
	>>> Accession: "Zdr-1"
