In [1]:
import glob
from os.path import join, isdir, exists
from os import makedirs
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from joblib import Parallel, delayed
import distinctipy
from itertools import combinations_with_replacement

In [2]:
# Return all folder names at a given path
def get_all_folders(path, only_foldernames, sorted_list):
    file_list = [x for x in glob.glob(join(path, '*')) if isdir(x)]
    if only_foldernames:
        file_list = [x.split('\\')[-1] for x in file_list]
    if sorted_list:
        return sorted(file_list)
    return file_list


In [3]:
# Turn text into capital case
def to_camel_case(text):
    return ' '.join([x[0].capitalize() + x[1:] for x in text.split(' ')])

In [4]:
# Leaf overlapping plots
def add_individual_leaf_overlapping_plot(out_path, acc, rep, leaf, rep_trait,
                                         x_data, y_data, x_min,
                                         color, font_plot_title, font_axis_title):
    fig = plt.figure()
    ax = plt.gca()
    ax.bar(x_data, y_data)
    plt.title('{} (Rep {:02d}, Leaf {:02d})'.format(acc, int(rep[4:]), leaf), fontdict=font_plot_title)
    plt.xlabel('Days After Sowing (DAS)', fontdict=font_axis_title)
    y_axis_label = rep_trait.replace('(', '[').replace(')', ']').replace('_', ' ').replace('^2', '²')
    if y_axis_label.startswith('l '):
        y_axis_label = 'Leaf ' + y_axis_label[2:]
    plt.ylabel(to_camel_case(y_axis_label),
               fontdict=font_axis_title)
    ax.set_xlim(xmin=x_min - 1)
    x_range = ax.get_xlim()
    ax.xaxis.set_ticks(np.arange(x_min, 1+x_range[1], 5))
    plt.grid(True)
    plt.minorticks_on()
    plt.savefig(join(out_path, '{}_{}_leaf_{:02d}.png'.format(acc, rep, leaf)), dpi=300, bbox_inches='tight')
    plt.close(fig)


In [5]:
# Leaf angle plots (as polar plots)
def add_individual_leaf_angle_plot(out_path, acc, rep, leaf, r_data, rad_data, color, font_plot_title):
    fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})
    plt.polar(rad_data, r_data, color=color)
    ax.set_yticklabels([])
    plt.title('Leaf angle: {} (rep {:02d}, leaf {:02d})'.format(acc, int(rep[4:]), leaf), fontdict=font_plot_title)
    theta_0 = np.deg2rad([0, 0])
    theta_90 = np.deg2rad([90, 90])
    theta_180 = np.deg2rad([180, 180])
    theta_270 = np.deg2rad([270, 270])
    radius = [0, ax.get_rmax()]
    ax.plot(theta_0, radius, theta_90, radius, theta_180, radius, theta_270, radius, lw=3, color=(1, 0, 0))
    ax.plot(theta_0[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
    ax.plot(theta_0[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
    ax.plot(theta_90[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
    ax.plot(theta_90[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
    ax.plot(theta_180[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
    ax.plot(theta_180[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
    ax.plot(theta_270[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
    ax.plot(theta_270[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
    plt.savefig(join(out_path, '{}_{}_leaf_{:02d}_angle.png'.format(acc, rep, leaf)), dpi=300, bbox_inches='tight')
    plt.close(fig)


In [10]:
# Create all plots for an accession
def plot_acc(excel_path, plots_path, colors, dataset, acc, min_n_days):
    
    # Initialize some useful variables
    print(f'\t{acc}')
    short_lived_accessions = []
    short_lived_replicates = []
    short_lived_avg_accessions = []
    n_max_legend_rows = 20
    font_plot_title = {'weight': 'bold', 'size': 'large', 'family':'serif'}
    font_legend_title = {'weight': 'bold', 'size': 10, 'family':'serif'}
    font_axis_title = {'weight': 'bold', 'family':'serif'}
    x_min = 13 if dataset == 'leaf_dataset1' else 11
    theta_0 = np.deg2rad([0, 0])
    theta_90 = np.deg2rad([90, 90])
    theta_180 = np.deg2rad([180, 180])
    theta_270 = np.deg2rad([270, 270])
    
    ################
    # Plant traits #
    ################
    acc_file = join(excel_path, dataset, 'Excels', acc, acc+'.xlsx')
    if exists(acc_file):
        
        # Read data and select replicates that do not have a short life
        acc_plot_path = join(plots_path, dataset, 'Plots', acc)
        makedirs(acc_plot_path)
        df_acc = pd.read_excel(acc_file, sheet_name='Plant_Traits')
        acc_traits = ['Isotropy', 'LAI', 'SOL']
        acc_traits_2 = [x for x in df_acc.columns if x not in ('Date', 'Time', 'Accession')]
        df_acc_2 = df_acc.loc[:, acc_traits_2].groupby(['Rep_num', 'DAS'], as_index=False).mean().round(3).copy()
        df_acc_2_min_DAS = df_acc_2.loc[:, ['Rep_num', 'DAS']].groupby(['Rep_num'])['DAS'].min()
        df_acc_2_max_DAS = df_acc_2.loc[:, ['Rep_num', 'DAS']].groupby(['Rep_num'])['DAS'].max()
        df_acc_2_sel_DAS = (df_acc_2_max_DAS - df_acc_2_min_DAS) >= min_n_days
        selected_reps = set(df_acc_2_max_DAS[df_acc_2_sel_DAS.values].index)
        
        # Special case when there are no reps available
        if len(selected_reps) == 0:
            short_lived_accessions.append((dataset, acc))
            new_min_n_days = 4
            df_acc_2_sel_DAS = (df_acc_2_max_DAS - df_acc_2_min_DAS) >= new_min_n_days
            selected_reps = set(df_acc_2_max_DAS[df_acc_2_sel_DAS.values].index)
        
        # Normal case
        if len(selected_reps) > 0:
            # Prepare initial data
            df_acc_3 = df_acc_2[df_acc_2['Rep_num'].isin(selected_reps)].copy()
            for acc_trait in acc_traits:
                # Plot data per trait
                
                # Prepare title and X label
                fig = plt.figure(figsize=(10, 6))
                plt.title(f'{acc}', fontdict=font_plot_title)
                plt.xlabel('Days After Sowing (DAS)', fontdict=font_axis_title)
                
                # Prepare Y label
                y_label = acc_trait
                if acc_trait == 'SOL':
                    y_label = 'Slenderness of Leaves (SOL)'
                elif acc_trait == 'LAI':
                    y_label = 'Leaf Area Index (LAI)'
                plt.ylabel(y_label, fontdict=font_axis_title)
                plt.grid(True)
                
                # Select data for each replicate and plot it inside the accession plot
                curr_reps = df_acc_3['Rep_num'].unique()
                for i_rep, rep in enumerate(curr_reps):
                    df_acc_4 = df_acc_3.loc[df_acc_3['Rep_num'] == rep].copy()
                    df_acc_4.sort_values(['DAS'], inplace=True)
                    plt.plot(df_acc_4['DAS'], df_acc_4[acc_trait],
                             color=colors[i_rep % len(colors)], label='{:02d}'.format(int(rep)))
                
                # Set axis ranges
                ax = plt.gca()
                ax.set_xlim(xmin=x_min - 1)  # , xmax=None)
                x_range = ax.get_xlim()
                ax.xaxis.set_ticks(np.arange(x_min, 1+x_range[1], 5))
                plt.minorticks_on()
                
                # Add legend and save plot to file
                plt.legend(loc='upper left',
                           bbox_to_anchor=(1.02, 1),
                           title='Reps:',
                           title_fontproperties=font_legend_title,
                           ncol=1+np.floor((len(curr_reps) - 1) / n_max_legend_rows))
                plt.savefig(join(acc_plot_path, f'{acc}_Rep_{acc_trait}.png'), dpi=300, bbox_inches='tight')
                plt.close(fig)
                
    ###############
    # Leaf traits #
    ###############
    
    # Cycle through all replicates in the current accession
    for rep in get_all_folders(join(excel_path, dataset, 'Excels', acc), only_foldernames=True, sorted_list=True):
        print(f'\t\t{rep}')
        rep_file = join(excel_path, dataset, 'Excels', acc, rep, rep+'.xlsx')
        if exists(rep_file):
            
            # Create output folders
            rep_plot_path = join(plots_path, dataset, 'Plots', acc, rep)
            makedirs(rep_plot_path)
            leaf_overlap_plot_path = join(rep_plot_path, 'Leaf_overlapping_percentage')
            makedirs(leaf_overlap_plot_path)
            leaf_angle_plot_path = join(rep_plot_path, 'Leaf_angle')
            makedirs(leaf_angle_plot_path)
            
            # Read replicate data, group it by leaf number, and select leaves that do not have a short life
            df_rep = pd.read_excel(rep_file, sheet_name='Leaf_Traits')
            rep_traits = [x for x in df_rep.columns if x.startswith('l_') or x == 'SOL']
            rep_traits_2 = [x for x in df_rep.columns if x not in ('Date', 'Time', 'Accession', 'Rep_num')]
            df_rep_2 = df_rep.loc[:, rep_traits_2].groupby(['Leaf_num', 'DAS'], as_index=False).mean().round(3).copy()
            df_rep_2_min_DAS = df_rep_2.loc[:, ['Leaf_num', 'DAS']].groupby(['Leaf_num'])['DAS'].min()
            df_rep_2_max_DAS = df_rep_2.loc[:, ['Leaf_num', 'DAS']].groupby(['Leaf_num'])['DAS'].max()
            df_rep_2_sel_DAS = (df_rep_2_max_DAS - df_rep_2_min_DAS) >= min_n_days
            selected_leaves = set(df_rep_2_max_DAS[df_rep_2_sel_DAS.values].index)
            
            # Special case when no leaves are selected
            if len(selected_leaves) == 0:
                short_lived_replicates.append((dataset, acc, rep))
                new_min_n_days = 4
                df_rep_2_sel_DAS = (df_rep_2_max_DAS - df_rep_2_min_DAS) >= new_min_n_days
                selected_leaves = set(df_rep_2_max_DAS[df_rep_2_sel_DAS.values].index)
            
            # Normal case
            if len(selected_leaves) > 0:
                
                # Filter rep data
                df_rep_3 = df_rep_2[df_rep_2['Leaf_num'].isin(selected_leaves)].copy()
                for rep_trait in rep_traits:
                    if rep_trait == 'l_angle (deg)':
                        # Plot angle data
                        fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})
                        plt.axes(projection = 'polar')
                        plt.title(f'Leaf angle: {acc} ({rep.capitalize().replace("_", " ")})', fontdict=font_plot_title)
                    else:
                        # Plot all other traits
                        fig = plt.figure(figsize=(10, 6))
                        plt.xlabel('Days After Sowing (DAS)', fontdict=font_axis_title)
                        if rep_trait == 'SOL':
                            y_axis_label = 'Slenderness of Leaf (SOL)'
                        elif rep_trait == 'l_RMA':
                            y_axis_label = 'Leaf Rotational Mass Asymmetry (RMA)'
                        elif rep_trait == 'l_width (mm)':
                            y_axis_label = 'Leaf Width (Diameter) [mm]'
                        else:
                            y_axis_label = rep_trait.replace('(', '[').replace(')', ']').replace('_', ' ').replace('^2', '²')
                            if y_axis_label.startswith('l '):
                                y_axis_label = 'Leaf ' + y_axis_label[2:]
                        plt.ylabel(to_camel_case(y_axis_label),
                                   fontdict=font_axis_title)
                        plt.grid(True)
                        plt.title(f'{acc} ({rep.capitalize().replace("_", " ")})', fontdict=font_plot_title)
                    
                    # Create individual leaf plots
                    curr_leaves = df_rep_3['Leaf_num'].unique()
                    for i_leaf, leaf in enumerate(curr_leaves):
                        df_rep_4 = df_rep_3.loc[df_rep_3['Leaf_num'] == leaf].copy()
                        df_rep_4.sort_values(['DAS'], inplace=True)
                        
                        # Create individual leaf overlapping plots
                        if rep_trait == 'l_overlapping (%)':
                            add_individual_leaf_overlapping_plot(
                                leaf_overlap_plot_path, acc, rep, leaf, rep_trait,
                                df_rep_4['DAS'], df_rep_4[rep_trait], x_min,
                                colors[i_leaf % len(colors)], font_plot_title, font_axis_title)
                        
                        if rep_trait == 'l_angle (deg)':
                            # Create individual leaf angle plots
                            plt.polar(df_rep_4[rep_trait] * 2 * np.pi / 360, df_rep_4['DAS'],
                                      color=colors[i_leaf % len(colors)], label='{:02d}'.format(int(leaf)))
                            add_individual_leaf_angle_plot(
                                leaf_angle_plot_path, acc, rep, leaf,
                                df_rep_4['DAS'], df_rep_4[rep_trait] * 2 * np.pi / 360,
                                colors[i_leaf % len(colors)], font_plot_title)
                        else:
                            # Normal individual leaf plots for all other traits
                            plt.plot(df_rep_4['DAS'], df_rep_4[rep_trait],
                                     color=colors[i_leaf % len(colors)], label='{:02d}'.format(int(leaf)))
                    
                    if rep_trait == 'l_angle (deg)':
                        # Set plot look for angle plots
                        ax = plt.gca()
                        ax.set_yticklabels([])
                        radius = [0, ax.get_rmax()]
                        ax.plot(theta_0, radius, theta_90, radius, theta_180, radius, theta_270, radius, lw=3, color=(1, 0, 0))
                        ax.plot(theta_0[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
                        ax.plot(theta_0[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
                        ax.plot(theta_90[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
                        ax.plot(theta_90[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
                        ax.plot(theta_180[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
                        ax.plot(theta_180[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
                        ax.plot(theta_270[1], radius[1], marker='o', color=(1, 0, 0), markersize=5)
                        ax.plot(theta_270[1] + np.pi/4, radius[1], marker='o', color=(1, 0, 0), markersize=5)
                    else:
                        # Set plot look for non-angle plots
                        ax = plt.gca()
                        ax.set_xlim(xmin=x_min - 1)
                        x_range = ax.get_xlim()
                        ax.xaxis.set_ticks(np.arange(x_min, 1+x_range[1], 5))
                        plt.minorticks_on()
                    
                    # Add legend
                    plt.legend(loc='upper left',
                               bbox_to_anchor=(1.02, 1),
                               title='Leaves:',
                               title_fontproperties=font_legend_title,
                               ncol=1+np.floor((len(curr_leaves) - 1) / n_max_legend_rows))
                    
                    # Save trait plots to their respective paths
                    if rep_trait == 'l_overlapping (%)':
                        file_path=join(rep_plot_path, f'{acc}_{rep}_Leaf_overlapping_percentage.png')
                    else:
                        if '(' in rep_trait:
                            idx_par = rep_trait.find(' (')
                            file_path=join(rep_plot_path, f'{acc}_{rep}_Leaf_{rep_trait[2:idx_par]}.png')
                        elif rep_trait == 'SOL':
                            file_path=join(rep_plot_path, f'{acc}_{rep}_{rep_trait}.png')
                        else:
                            file_path=join(rep_plot_path, f'{acc}_{rep}_Leaf_{rep_trait[2:]}.png')
                    plt.savefig(file_path, dpi=300, bbox_inches='tight')

                    plt.close(fig)
                    
    ###########################
    # Avg. traits across reps #
    ###########################
    avg_acc_file = join(excel_path, dataset, 'Excels', acc, acc+'.xlsx')
    if exists(avg_acc_file):
        
        # Read accession data and select accessions that do not have a short life
        avg_acc_plot_path = join(plots_path, dataset, 'Average_plots', acc)
        makedirs(avg_acc_plot_path)
        df_avg_acc = pd.read_excel(avg_acc_file, sheet_name='Accession_Leaf_Traits_AVG')
        avg_acc_traits = [x for x in df_avg_acc.columns if x not in ['Date']]
        avg_acc_traits_2 = [x for x in df_avg_acc.columns
                            if (x.startswith('l_') or x == 'SOL_AVG') and x != 'l_overlapping_AVG (%)']
        df_avg_acc_2 = df_avg_acc.loc[:, avg_acc_traits].groupby(['Accession', 'Leaf_num', 'DAS'],
                                                                 as_index=False).mean().round(3).copy()
        df_avg_acc_2_min_DAS = df_avg_acc_2.loc[:, ['Accession', 'Leaf_num', 'DAS']].groupby(
            ['Accession', 'Leaf_num'])['DAS'].min()
        df_avg_acc_2_max_DAS = df_avg_acc_2.loc[:, ['Accession', 'Leaf_num', 'DAS']].groupby(
            ['Accession', 'Leaf_num'])['DAS'].max()
        df_avg_acc_2_sel_DAS = (df_avg_acc_2_max_DAS - df_avg_acc_2_min_DAS) >= min_n_days
        selected_avg_leaves = set(df_avg_acc_2_max_DAS[df_avg_acc_2_sel_DAS.values].index)
        
        # Special case when nothing is selected
        if len(selected_avg_leaves) == 0:
            short_lived_avg_accessions.append((dataset, acc))
            new_min_n_days = 4
            df_avg_acc_2_sel_DAS = (df_avg_acc_2_max_DAS - df_avg_acc_2_min_DAS) >= new_min_n_days
            selected_avg_leaves = set(df_avg_acc_2_max_DAS[df_avg_acc_2_sel_DAS.values].index)
        
        # Normal case
        if len(selected_avg_leaves) > 0:
            
            # Select the right data
            df_avg_acc_3 = df_avg_acc_2[df_avg_acc_2[['Accession', 'Leaf_num']].apply(tuple, axis=1).isin(selected_avg_leaves)].copy()
            for avg_acc_trait in avg_acc_traits_2:
                
                # Create figure title and axis labels
                fig = plt.figure(figsize=(10, 6))
                plt.xlabel('Days After Sowing (DAS)', fontdict=font_axis_title)
                if avg_acc_trait == 'SOL_AVG':
                    y_axis_label = 'Average Slenderness of Leaves (SOL)'
                elif avg_acc_trait == 'l_RMA_AVG':
                    y_axis_label = 'Average Rotational Mass Asymmetry (RMA)'
                elif avg_acc_trait == 'l_width_AVG (mm)':
                    y_axis_label = 'Average Leaf Width (Diameter) [mm]'
                else:
                    y_axis_label = \
                        avg_acc_trait.replace('_AVG', '').replace('(', '[').replace(')', ']').replace('_', ' ').replace('^2', '²')
                    if y_axis_label.startswith('l '):
                        y_axis_label = 'Average Leaf ' + y_axis_label[2:]
                    y_axis_label = to_camel_case(y_axis_label)
                plt.ylabel(y_axis_label, fontdict=font_axis_title)
                plt.grid(True)
                plt.title(f'{acc}', fontdict=font_plot_title)
                
                # Plot traits per leaf
                curr_leaves = df_avg_acc_3['Leaf_num'].unique()
                for i_leaf, leaf in enumerate(curr_leaves):
                    df_avg_acc_4 = df_avg_acc_3.loc[df_avg_acc_3['Leaf_num'] == leaf].copy()
                    df_avg_acc_4.sort_values(['DAS'], inplace=True)
                    plt.plot(df_avg_acc_4['DAS'], df_avg_acc_4[avg_acc_trait],
                             color=colors[i_leaf % len(colors)], label='{:02d}'.format(int(leaf)))
                
                # Add axis ticks and figure legend
                ax = plt.gca()
                ax.set_xlim(xmin=x_min - 1)
                x_range = ax.get_xlim()
                ax.xaxis.set_ticks(np.arange(x_min, 1+x_range[1], 5))
                plt.minorticks_on()
                plt.legend(loc='upper left',
                           bbox_to_anchor=(1.02, 1),
                           title='Leaves:',
                           title_fontproperties=font_legend_title,
                           ncol=1+np.floor((len(curr_leaves) - 1) / n_max_legend_rows))
                
                # Save plots to various paths, depending on the trait
                if '(' in avg_acc_trait:
                    idx_par = avg_acc_trait.find(' (')
                    file_path=join(avg_acc_plot_path, f'{acc}_Average_leaf_{avg_acc_trait[2:idx_par-4]}.png')
                elif avg_acc_trait == 'SOL_AVG':
                    file_path=join(avg_acc_plot_path, f'{acc}_Average_{avg_acc_trait[:-4]}.png')
                else:
                    file_path=join(avg_acc_plot_path, f'{acc}_Average_leaf_{avg_acc_trait[2:-4]}.png')
                plt.savefig(file_path, dpi=300, bbox_inches='tight')

                plt.close(fig)
    
    # These return values are useful only if you are interested which data was excluded from plotting
    return short_lived_accessions, short_lived_replicates, short_lived_avg_accessions


In [None]:
###############
# Main script #
###############

# Experiment parameters
excel_path = r'D:\Path\to\excel\files\with\leaf\traits'
plots_path = excel_path
init_colors = distinctipy.get_colors(75, list(combinations_with_replacement([1, 0.9, 0.8], 3)))
colors = [x for x in init_colors if np.sum(sorted(x, reverse=True)[:2]) < 1.5]
colors.insert(4, (1, 0.68, 0))
min_n_days = 9
short_lived_accessions = []
short_lived_replicates = []
short_lived_avg_accessions = []
parallelize = True

for dataset in get_all_folders(excel_path, only_foldernames=True, sorted_list=True):
    
    print(f'{dataset}')
    acc_folder_list = get_all_folders(join(excel_path, dataset, 'Excels'), only_foldernames=True, sorted_list=True)
    if parallelize:
        # Parallel version
        results = Parallel(n_jobs=-2)(
            delayed(plot_acc)(excel_path, plots_path, colors, dataset, acc, min_n_days)
            for acc in acc_folder_list)
        for curr_short_lived_accessions, curr_short_lived_replicates, curr_short_lived_avg_accessions in results:
            short_lived_accessions.extend(curr_short_lived_accessions)
            short_lived_replicates.extend(curr_short_lived_replicates)
            short_lived_avg_accessions.extend(curr_short_lived_avg_accessions)
    else:
        # Serial version
        for acc in acc_folder_list:
            curr_short_lived_accessions, curr_short_lived_replicates, curr_short_lived_avg_accessions = plot_acc(
                excel_path, plots_path, colors, dataset, acc, min_n_days)
            short_lived_accessions.extend(curr_short_lived_accessions)
            short_lived_replicates.extend(curr_short_lived_replicates)
            short_lived_avg_accessions.extend(curr_short_lived_avg_accessions)

