In [3]:
import glob
from os.path import join, isfile, isdir, basename
import numpy as np
import pandas as pd
import cv2
from joblib import Parallel, delayed
from pandas.io.formats import excel
excel.ExcelFormatter.header_style = None


In [22]:
def compute_plant_area(path, accession, replicate, calibration_factor):
    # Find all mask paths in the directory
    buff_mask_paths = [x for x in glob.glob(join(path, '*.*')) if isfile(x)]
    
    # Initialize lists to store data
    dates = []
    times = []
    p_areas = []
    rep_nums = []
    classes = []
    
    # Iterate over each mask path
    for mask_path in buff_mask_paths:
        # Compute mask area
        mask_area = np.sum(np.any(cv2.imread(mask_path) > 0, axis=2))
        
        # Extract information from the filename
        fname_elems = basename(mask_path).split('_')
        dates.append(f'{fname_elems[2]}//{fname_elems[3]}//{fname_elems[1]}')
        times.append(f'{fname_elems[4]}:{fname_elems[5]}')
        p_areas.append(np.round(calibration_factor * calibration_factor * mask_area, 3))
        rep_nums.append(int(replicate[-2:]))
        classes.append(accession)
    
    # Create a DataFrame with plant area details
    area_df = pd.DataFrame({
        'Date': np.array(dates, dtype=object),
        'Time': np.array(times, dtype=object),
        'p_area': np.array(p_areas, dtype=float),
        'rep_num': np.array(rep_nums, dtype=int),
        'class': np.array(classes, dtype=object)
    })
    
    # Return results
    return accession, replicate, area_df

In [25]:
def process_dataset(dataset_id, in_root_mask_path, out_path, calibration_factor, parallel=False):

    in_mask_paths = []

    # Iterate over folders in the root mask path
    for folder in glob.glob(join(in_root_mask_path, '*')):
        accession_folder = basename(folder)

        # Skip folders starting with '_'
        if accession_folder.startswith('_'):
            print('\n*** No accession folder: "{}"'.format(folder))
            continue

        all_folder_contents = glob.glob(join(folder, '*'))
        is_accession = all([basename(x).startswith('rep_') and isdir(x) for x in all_folder_contents])

        if is_accession:
            for in_rep_path in all_folder_contents:
                in_mask_path = join(in_root_mask_path, accession_folder, basename(in_rep_path), 'masks')
                in_mask_paths.append((in_mask_path, accession_folder, basename(in_rep_path)))

        else:
            print('*** No accession folder: "{}"'.format(folder))

    if len(in_mask_paths) > 0:
        if parallel:
            # Parallel processing of mask paths
            results = Parallel(n_jobs=-2)(delayed(compute_plant_area)(in_mask_path,
                                                                      accession_folder,
                                                                      replicate_folder,
                                                                      calibration_factor) 
                                          for in_mask_path, accession_folder, replicate_folder in in_mask_paths)
            # Sorting the results based on accession and replicate
            results = [u[2] for u in sorted(results, key=lambda x: x[:2])]
        else:
            results = []
            # Sequential processing of mask paths
            for in_mask_path, accession_folder, replicate_folder in in_mask_paths:
                results.append(compute_plant_area(in_mask_path, accession_folder, replicate_folder, calibration_factor)[2])

        # Concatenate results into a single DataFrame
        pd_results = pd.concat(results, ignore_index=True)

        # Save the results to an Excel file
        pd_results.to_excel(join(out_path, f'extracted_features_DS{dataset_id}.xlsx'),
                            sheet_name='Extracted Features',
                            index=False)
    else:
        print('!!!!!!!!!! No mask paths found !!!!!!!!!!')

    print('FINISHED dataset {}.\n'.format(dataset_id))


In [None]:
# Define root mask paths and calibration factors
root_mask_paths = [
    r'',  # Add your root mask paths here
    r'',
]
calibration_factors = [
    0.13715,
    0.14690,
]

# Iterate over datasets and process each one
for i_ds, root_mask_path in enumerate(root_mask_paths):
    process_dataset(i_ds + 1, root_mask_path, r'', calibration_factors[i_ds], True)