In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import numpy as np

VPRM_old_or_new = "migli"
version = "V11"
region = "Alps" #  "Alps" or  "Europe"

R2_lt_zero = True # test so see results for R2_lt_zero - default: True (deletes sites below zero R2) 

run_ID = region+"_VPRM_optimized_params_diff_evo_"+version+"_100"
base_path = "/home/madse/Downloads/Fluxnet_Data/all_tuned_params/" + run_ID 

folders = [
    f
    for f in os.listdir(base_path)
    if os.path.isdir(os.path.join(base_path, f))
]
flx_folders = [folder for folder in folders if folder.startswith("FLX_")]

if not flx_folders:
    print("Warning - There is no input data")
    raise SystemExit(0)

df_parameters = pd.DataFrame()


# Loop through each FLX_ folder and append data from XLSX files
for folder in flx_folders:
    folder_path = os.path.join(base_path, folder)
    files = [f for f in os.listdir(folder_path) if f.endswith(VPRM_old_or_new+'_diff_evo_'+version+'_100.xlsx')]
    for file in files:
        file_path = os.path.join(folder_path, file)
        data = pd.read_excel(file_path)
        df_parameters = pd.concat([df_parameters, data], axis=0)


In [None]:
base_path

In [None]:

folders = [
    f
    for f in os.listdir(base_path)
    if os.path.isdir(os.path.join(base_path, f))
]
flx_folders = [folder for folder in folders if folder.startswith("FLX_")]

if not flx_folders:
    print("Warning - There is no input data")
    raise SystemExit(0)

df_parameters = pd.DataFrame()


# Loop through each FLX_ folder and append data from XLSX files
for folder in flx_folders:
    folder_path = os.path.join(base_path, folder)
    files = [f for f in os.listdir(folder_path) if f.endswith(VPRM_old_or_new+'_diff_evo_'+version+'_100.xlsx')]
    for file in files:
        file_path = os.path.join(folder_path, file)
        data = pd.read_excel(file_path)
        df_parameters = pd.concat([df_parameters, data], axis=0)


In [None]:
sns.color_palette('bright')

In [None]:
# import matplotlib.pyplot as plt
# import numpy as np
# import seaborn as sns

df_parameters_nn = df_parameters.copy()
df_parameters_nn = df_parameters_nn.dropna()
font_size = 12
color_palette = sns.color_palette('bright')
pft_colors = {
    'CRO': color_palette[8], 
    'DBF': color_palette[0], 
    'ENF': color_palette[9],
    'MF': color_palette[4], 
    'GRA': color_palette[2],     
    'SHB': color_palette[7],
    'WET': color_palette[3]
}



In [None]:
df_parameters_nn = df_parameters_nn[df_parameters_nn["Topt"] < 1 ]

print(df_parameters_nn["Topt"]-df_parameters_nn["T_mean"])

In [None]:
if R2_lt_zero:
    print(f"Number of deleted site years due to R2_NEE < 0 = {sum(df_parameters['R2_NEE'] < 0)}")
    df_parameters = df_parameters[df_parameters["R2_NEE"] > 0]
    df_parameters.reset_index(drop=True, inplace=True)
    str_R2_lt_zero = ""
else:
    print(f"Number of deleted site years due to R2_NEE > 0 = {sum(df_parameters['R2_NEE'] < 0)}")
    df_parameters = df_parameters[df_parameters["R2_NEE"] < 0]
    df_parameters.reset_index(drop=True, inplace=True)
    str_R2_lt_zero = "_R2_lt_zero"


In [None]:

font_size = 12
plt.figure(figsize=(8, 6))
plt.scatter(df_parameters["T_mean"], df_parameters["Topt"], alpha=0.5, c=df_parameters["PFT"].map(pft_colors))
coefficients = np.polyfit(df_parameters["T_mean"], df_parameters["Topt"], 1)
poly = np.poly1d(coefficients)
plt.plot(df_parameters["T_mean"], poly(df_parameters["T_mean"]), color='red', label=f'y = {coefficients[0]:.2f}x + {coefficients[1]:.2f}')
equation_regression = f'y = {coefficients[0]:.2f}x + {coefficients[1]:.2f}'
equation_normal = 'y = x'
plt.xlabel(r'$T_{\mathrm{mean}}$',fontsize=font_size+2)
plt.ylabel(r'$T_{\mathrm{opt}}$',fontsize=font_size+2)
plt.xticks(fontsize=font_size)
plt.yticks(fontsize=font_size)
plt.grid(True)

for pft, color in pft_colors.items():
    plt.scatter([], [], c=color, label=pft)  # Create an empty scatter plot for each PFT label

plt.legend(fontsize=font_size)
plt.tight_layout()
plt.savefig(
    base_path 
    + "/regression_Topt_vs_Tmean_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + str_R2_lt_zero
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)


In [None]:


plt.figure(figsize=(8, 6))
plt.scatter(df_parameters["T_max"].dropna(), df_parameters["Topt"].dropna(), alpha=0.5, c=df_parameters["PFT"].map(pft_colors))
coefficients = np.polyfit(df_parameters["T_max"], df_parameters["Topt"], 1)
poly = np.poly1d(coefficients)
plt.plot(df_parameters["T_max"], poly(df_parameters["T_max"]), color='red', label=f'y = {coefficients[0]:.2f}x + {coefficients[1]:.2f}')
equation_regression = f'y = {coefficients[0]:.2f}x + {coefficients[1]:.2f}'
equation_normal = 'y = x'
plt.xlabel(r'$T_{\mathrm{max}}$',fontsize=font_size+2)
plt.ylabel(r'$T_{\mathrm{opt}}$',fontsize=font_size+2)
plt.xticks(fontsize=font_size)
plt.yticks(fontsize=font_size)

plt.grid(True)

for pft, color in pft_colors.items():
    plt.scatter([], [], c=color, label=pft)  # Create an empty scatter plot for each PFT label

plt.legend(fontsize=font_size)

plt.tight_layout()
plt.savefig(
    base_path 
    + "/regression_Topt_vs_Tmax_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + str_R2_lt_zero
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)

In [None]:
if VPRM_old_or_new == "new":
    parameters_to_plot = [
        "Topt",
        "PAR0",
        "lambd",
        "alpha1",
        "alpha2",
        "beta",
        "T_crit",
        "T_mult",
        "gamma",
        "theta1",
        "theta2",
        "theta3",
    ]
elif VPRM_old_or_new == "old":
    parameters_to_plot = [
        "Topt",
        "PAR0",
        "lambd",
        "alpha",
        "beta",
    ]
elif VPRM_old_or_new == "migli": 
    parameters_to_plot = [
        "R_lai0", "alpha_lai", "k2", "E0", "alpha_p", "k_mm"
    ]

In [None]:
# Define the color palette and the PFT color mapping

df_parameters.sort_values(by="PFT", inplace=True)
color_palette = sns.color_palette('bright')
# Create a list of colors for the boxplot based on the sorted PFTs
pft_order = df_parameters['PFT'].unique()
colors = [pft_colors[pft] for pft in pft_order]


In [None]:

if VPRM_old_or_new == "new":
    fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
    font_size = 12
else:
    fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))
    font_size = 14

axes = axes.flatten()
for i, parameter in enumerate(parameters_to_plot):
    sns.boxplot(x='PFT', y=parameter, data=df_parameters, ax=axes[i],palette=colors) 
    sns.swarmplot(x='PFT', y=parameter, data=df_parameters, color="0.25", alpha=0.5, ax=axes[i])
    axes[i].set_title(f'{parameter} by PFT',fontsize=font_size+2, weight='bold')
    axes[i].set_xlabel('PFT',fontsize=font_size+2)
    axes[i].set_ylabel(parameter,fontsize=font_size+2)
    axes[i].tick_params(axis='x', rotation=45)
    axes[i].tick_params(axis='both', which='major', labelsize=font_size)



handles = []
for pft, color in pft_colors.items():
    handles.append(plt.scatter([], [], c=color, label=pft))

plt.legend(handles=handles, bbox_to_anchor=(0.5, -0.2), loc='upper center', ncol=len(pft_colors)//2, fontsize=font_size)

plt.tight_layout()
plt.savefig(
    base_path 
    + "/boxplot_PFTs_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + str_R2_lt_zero
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)



In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Create a dictionary mapping site_ID to PFT
site_to_pft = df_parameters.set_index('site_ID')['PFT'].to_dict()

# Create a list of colors for each site based on the PFT
site_colors = [pft_colors[site_to_pft[site]] for site in df_parameters['site_ID'].unique()]

if VPRM_old_or_new == "new":
    fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
else:
    fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))

axes = axes.flatten()

for i, parameter in enumerate(parameters_to_plot):
    sns.boxplot(x='site_ID', y=parameter, data=df_parameters, ax=axes[i], palette=site_colors)
    #sns.swarmplot(x='site_ID', y=parameter, data=df_parameters, color="0.15", alpha=0.5, ax=axes[i])
    axes[i].set_title(f'{parameter} by site_ID', fontsize=font_size+2, weight='bold')
    axes[i].set_xlabel('site_ID', fontsize=font_size+2)
    axes[i].set_ylabel(parameter, fontsize=font_size+2)
    axes[i].tick_params(axis='x', rotation=90)
    axes[i].tick_params(axis='both', which='major', labelsize=font_size)
    if VPRM_old_or_new == "new":
        axes[i].tick_params(axis='x', which='major', labelsize=font_size-5)

# Create legend handles
handles = []
for pft, color in pft_colors.items():
    handles.append(plt.scatter([], [], c=color, label=pft))

plt.legend(handles=handles, bbox_to_anchor=(0.5, -0.2), loc='upper center', ncol=len(pft_colors)//2, fontsize=font_size)

plt.tight_layout()
plt.savefig(
    base_path +
    "/boxplot_siteIDs_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + str_R2_lt_zero
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)
plt.show()


In [None]:
grouped = df_parameters.groupby('PFT')
dfs_to_concat = []
for parameter in parameters_to_plot:
    for pft, group_data in grouped:
        mean = group_data[parameter].mean()
        median = group_data[parameter].median()
        # Create a DataFrame with the new row
        new_row = pd.DataFrame({'PFT': [pft], 'Parameter': [parameter], 'Mean': [mean], 'Median': [median]})
        # Append the new DataFrame to the list
        dfs_to_concat.append(new_row)

mean_median_df = pd.concat(dfs_to_concat, ignore_index=True)
mean_median_df.to_excel(base_path + "/mean_median_params_"+ VPRM_old_or_new+ "_"+run_ID+str_R2_lt_zero+".xlsx", index=False)

In [None]:

# Pivoting the DataFrame
pivoted_mean = mean_median_df.pivot(index='Parameter', columns='PFT', values='Mean')
pivoted_median = mean_median_df.pivot(index='Parameter', columns='PFT', values='Median')

# Exporting to CSV
pivoted_mean.to_csv(base_path + "/"+region+"_parameters_mean_"+ VPRM_old_or_new+ "_"+run_ID+str_R2_lt_zero+".csv")
pivoted_median.to_csv(base_path + "/"+region+"_parameters_median_"+ VPRM_old_or_new+ "_"+run_ID+str_R2_lt_zero+".csv")

In [None]:

parameters_to_plot = ['R2_GPP', 'R2_Reco', 'R2_NEE', 'RMSE_GPP', 'RMSE_Reco', 'RMSE_NEE']
font_size = font_size-2
grouped = df_parameters.groupby('PFT')
dfs_to_concat = []
for parameter in parameters_to_plot:
    for pft, group_data in grouped:
        mean = group_data[parameter].mean()
        median = group_data[parameter].median()
        # Create a DataFrame with the new row
        new_row = pd.DataFrame({'PFT': [pft], 'Parameter': [parameter], 'Mean': [mean], 'Median': [median]})
        # Append the new DataFrame to the list
        dfs_to_concat.append(new_row)

mean_median_df = pd.concat(dfs_to_concat, ignore_index=True)
mean_median_df.to_excel(base_path + "/mean_median_R2_RMSE_"+ VPRM_old_or_new+ "_"+run_ID+str_R2_lt_zero+".xlsx", index=False)

parameters_to_plot = [ 'R2_NEE', 'RMSE_NEE']

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 6))
axes = axes.flatten()

for i, parameter in enumerate(parameters_to_plot):
    sns.boxplot(x='PFT', y=parameter, data=df_parameters, ax=axes[i],palette=colors)
    sns.swarmplot(x='PFT', y=parameter, data=df_parameters, color="0.25", alpha=0.5, ax=axes[i])
    axes[i].set_title(f'{parameter} by PFT', fontsize=font_size+2, weight='bold')
    axes[i].set_xlabel('PFT', fontsize=font_size+2)
    axes[i].set_ylabel(parameter, fontsize=font_size+2)
    axes[i].tick_params(axis='x', rotation=90)
    axes[i].tick_params(axis='both', which='major', labelsize=font_size)

plt.tight_layout()
plt.savefig(
    base_path +
    "/boxplot_R2_RMSE_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + str_R2_lt_zero
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)

In [None]:
# try to plot colors according to PFT
# import seaborn as sns
# import matplotlib.pyplot as plt

# color_palette = 'tab20'  # 'muted', 'deep', 'husl'
# sns.set_palette(color_palette)

# if VPRM_old_or_new == "new":
#     fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
# else:
#     fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))

# axes = axes.flatten()

# # Create a list to store legend handles and labels
# legend_handles = []

# for i, parameter in enumerate(parameters_to_plot):
#     sns.boxplot(x='PFT', y=parameter, data=df_parameters, ax=axes[i])
#     sns.swarmplot(x='PFT', y=parameter, data=df_parameters, color="0.25", alpha=0.5, ax=axes[i], dodge=True)
#     axes[i].set_title(f'{parameter} by PFT')
#     axes[i].set_xlabel('PFT')
#     axes[i].set_ylabel(parameter)
#     axes[i].tick_params(axis='x', rotation=45)
    
#     # Add handles and labels for legend
#     handles, labels = axes[i].get_legend_handles_labels()
#     legend_handles.extend(handles)

# plt.tight_layout(rect=[0, 0.05, 1, 1])  # Adjust layout to accommodate legend below all the tiles

# plt.savefig(
#     base_path 
#     + "/boxplot_PFTs_"
#     + VPRM_old_or_new
#     + "_"
#     + run_ID
#     + ".eps",
#     dpi=300,
#     bbox_inches="tight",
# )

# if VPRM_old_or_new == "new":
#     fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
# else:
#     fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))

# axes = axes.flatten()

# for i, parameter in enumerate(parameters_to_plot):
#     sns.boxplot(x='site_ID', y=parameter, data=df_parameters, hue='PFT', palette=color_palette, ax=axes[i])
#     sns.swarmplot(x='site_ID', y=parameter, data=df_parameters, hue='PFT', palette=color_palette, alpha=0, dodge=True, ax=axes[i])
#     axes[i].set_title(f'{parameter} by site_ID')
#     axes[i].set_xlabel('site_ID')
#     axes[i].set_ylabel(parameter)
#     axes[i].tick_params(axis='x', rotation=45)
# fig.legend(legend_handles, labels, loc='lower center', ncol=len(set(df_parameters['PFT'])), bbox_to_anchor=(0.5, -0.05))
# plt.tight_layout()

# plt.savefig(
#     base_path +
#     "/boxplot_siteIDs_"
#     + VPRM_old_or_new
#     + "_"
#     + run_ID
#     + ".eps",
#     dpi=300,
#     bbox_inches="tight",
# )
# plt.show()


In [None]:
# if VPRM_old_or_new == "new":
#     fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
# else:
#     fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))  

# axes = axes.flatten()

# mean_median_df = pd.DataFrame(columns=['Parameter', 'Mean', 'Median'])

# for i, parameter in enumerate(parameters_to_plot):
#     q_low = df_parameters[parameter].quantile(0.05)
#     q_high = df_parameters[parameter].quantile(0.95)
#     filtered_data = df_parameters[(df_parameters[parameter] >= q_low) & (df_parameters[parameter] <= q_high)]
#     sns.boxplot(x='PFT', y=parameter, data=filtered_data, ax=axes[i])
#     sns.swarmplot(x='PFT', y=parameter, data=filtered_data, color="0.25", alpha=0.5, ax=axes[i])
#     axes[i].set_title(f'{parameter} by PFT')
#     axes[i].set_xlabel('PFT')
#     axes[i].set_ylabel(parameter)
#     axes[i].tick_params(axis='x', rotation=45)
    
# plt.tight_layout()
# plt.savefig(base_path + "/boxplot_PFTs_filtered_with_median_"+run_ID+".eps", dpi=300, bbox_inches="tight")
# plt.show()
