In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

VPRM_old_or_new = "old"
run_ID = "VPRM_"+VPRM_old_or_new+"_optimized_params_diff_evo_V2_100"
base_path = "/home/madse/Downloads/Fluxnet_Data/all_tuned_params/" + run_ID

folders = [
    f
    for f in os.listdir(base_path)
    if os.path.isdir(os.path.join(base_path, f))
]
flx_folders = [folder for folder in folders if folder.startswith("FLX_")]

if not flx_folders:
    print("Warning - There is no input data")
    raise SystemExit(0)

df_parameters = pd.DataFrame()


# Loop through each FLX_ folder and append data from XLSX files
for folder in flx_folders:
    folder_path = os.path.join(base_path, folder)
    files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]
    for file in files:
        file_path = os.path.join(folder_path, file)
        data = pd.read_excel(file_path)
        df_parameters = pd.concat([df_parameters, data], axis=0)
print(df_parameters)

In [None]:
# List of tuples containing site_ID and Year values to drop
values_to_drop = [
    ("CH-Cha", 2009),
    ("CH-Cha", 2005),
    ("CH-Dav", 2005),
    ("CH-Fru", 2005),
    ("CH-Fru", 2006),
    ("CH-Lae", 2004),
    ("CZ-wet", 2006),
    ("DE-Lkb", 2009),
    ("DE-Lkb", 2010),
    ("DE-Lkb", 2013),
    ("DE-SfN", 2012),
    ("IT-La2", 2000),
    ("IT-La2", 2002),
    ("IT-PT1", 2002),
    ("IT-Ren", 2004),
    ("IT-Ren", 2001),
    ("IT-Ren", 2000),
    ("IT-Tor", 2008)
]

df_parameters['site_year'] = df_parameters['site_ID'] + '_' + df_parameters['Year'].astype(str)
for site_year in set(df_parameters['site_year']).intersection(set('_'.join(map(str, tpl)) for tpl in values_to_drop)):
    df_parameters = df_parameters[df_parameters['site_year'] != site_year]
df_parameters.drop(columns=['site_year'], inplace=True)

In [None]:
df_parameters


In [None]:
if VPRM_old_or_new == "new":
    parameters_to_plot = [
        "Topt",
        "PAR0",
        "lambd",
        "alpha1",
        "alpha2",
        "beta",
        "T_crit",
        "T_mult",
        "gamma",
        "theta1",
        "theta2",
        "theta3",
    ]
else:
    parameters_to_plot = [
        "Topt",
        "PAR0",
        "lambd",
        "alpha",
        "beta",
    ]


In [None]:

color_palette = 'tab20'  # 'muted', 'deep', 'husl'
sns.set_palette(color_palette)

if VPRM_old_or_new == "new":
    fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
else:
    fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))

axes = axes.flatten()


for i, parameter in enumerate(parameters_to_plot):
    sns.boxplot(x='PFT', y=parameter, data=df_parameters, ax=axes[i])
    sns.swarmplot(x='PFT', y=parameter, data=df_parameters, color="0.25", alpha=0.5, ax=axes[i])
    axes[i].set_title(f'{parameter} by PFT')
    axes[i].set_xlabel('PFT')
    axes[i].set_ylabel(parameter)
    axes[i].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(
    base_path 
    + "/boxplot_PFTs_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)

if VPRM_old_or_new == "new":
    fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
else:
    fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))
    
axes = axes.flatten()

for i, parameter in enumerate(parameters_to_plot):
    sns.boxplot(x='site_ID', y=parameter, data=df_parameters, ax=axes[i])
    sns.swarmplot(x='site_ID', y=parameter, data=df_parameters, color="0.25", alpha=0.5, ax=axes[i])
    axes[i].set_title(f'{parameter} by site_ID')
    axes[i].set_xlabel('site_ID')
    axes[i].set_ylabel(parameter)
    axes[i].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(
    base_path +
    "/boxplot_siteIDs_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)
plt.show()




In [None]:
# if VPRM_old_or_new == "new":
#     fig, axes = plt.subplots(nrows=4, ncols=3, figsize=(15, 20))
# else:
#     fig, axes = plt.subplots(nrows=5, ncols=1, figsize=(10, 30))  

# axes = axes.flatten()

# mean_median_df = pd.DataFrame(columns=['Parameter', 'Mean', 'Median'])

# for i, parameter in enumerate(parameters_to_plot):
#     q_low = df_parameters[parameter].quantile(0.05)
#     q_high = df_parameters[parameter].quantile(0.95)
#     filtered_data = df_parameters[(df_parameters[parameter] >= q_low) & (df_parameters[parameter] <= q_high)]
#     sns.boxplot(x='PFT', y=parameter, data=filtered_data, ax=axes[i])
#     sns.swarmplot(x='PFT', y=parameter, data=filtered_data, color="0.25", alpha=0.5, ax=axes[i])
#     axes[i].set_title(f'{parameter} by PFT')
#     axes[i].set_xlabel('PFT')
#     axes[i].set_ylabel(parameter)
#     axes[i].tick_params(axis='x', rotation=45)
    
# plt.tight_layout()
# plt.savefig(base_path + "/boxplot_PFTs_filtered_with_median_"+run_ID+".eps", dpi=300, bbox_inches="tight")
# plt.show()


In [None]:
# grouped = df_parameters.groupby('PFT')
# dfs_to_concat = []
# for parameter in parameters_to_plot:
#     for pft, group_data in grouped:
#         q_low = group_data[parameter].quantile(0.05)
#         q_high = group_data[parameter].quantile(0.95)
#         filtered_data = group_data[(group_data[parameter] >= q_low) & (group_data[parameter] <= q_high)]
#         filtered_mean = filtered_data[parameter].mean()
#         filtered_median = filtered_data[parameter].median()
#         # Create a DataFrame with the new row
#         new_row = pd.DataFrame({'PFT': [pft], 'Parameter': [parameter], 'Filtered_Mean': [filtered_mean], 'Filtered_Median': [filtered_median]})
        
#         # Append the new DataFrame to the list
#         dfs_to_concat.append(new_row)

# # Concatenate all DataFrames in the list
# filtered_mean_median_df = pd.concat(dfs_to_concat, ignore_index=True)

# filtered_mean_median_df.to_excel(base_path + "/filtered_mean_median_params_"+run_ID+".xlsx", index=False)

In [None]:
grouped = df_parameters.groupby('PFT')
dfs_to_concat = []
for parameter in parameters_to_plot:
    for pft, group_data in grouped:
        mean = group_data[parameter].mean()
        median = group_data[parameter].median()
        # Create a DataFrame with the new row
        new_row = pd.DataFrame({'PFT': [pft], 'Parameter': [parameter], 'Filtered_Mean': [mean], 'Filtered_Median': [median]})
        # Append the new DataFrame to the list
        dfs_to_concat.append(new_row)

filtered_mean_median_df = pd.concat(dfs_to_concat, ignore_index=True)
filtered_mean_median_df.to_excel(base_path + "/mean_median_params_"+ VPRM_old_or_new+ "_"+run_ID+".xlsx", index=False)

In [None]:

parameters_to_plot = ['R2_GPP', 'R2_Reco', 'R2_NEE', 'RMSE_GPP', 'RMSE_Reco', 'RMSE_NEE']

grouped = df_parameters.groupby('PFT')
dfs_to_concat = []
for parameter in parameters_to_plot:
    for pft, group_data in grouped:
        mean = group_data[parameter].mean()
        median = group_data[parameter].median()
        # Create a DataFrame with the new row
        new_row = pd.DataFrame({'PFT': [pft], 'Parameter': [parameter], 'Filtered_Mean': [mean], 'Filtered_Median': [median]})
        # Append the new DataFrame to the list
        dfs_to_concat.append(new_row)

filtered_mean_median_df = pd.concat(dfs_to_concat, ignore_index=True)
filtered_mean_median_df.to_excel(base_path + "/mean_median_R2_RMSE_"+ VPRM_old_or_new+ "_"+run_ID+".xlsx", index=False)

# grouped = df_parameters.groupby('PFT')
# dfs_to_concat = []
# for parameter in parameters_to_plot:
#     for pft, group_data in grouped:
#         q_low = group_data[parameter].quantile(0.05)
#         q_high = group_data[parameter].quantile(0.95)
#         filtered_data = group_data[(group_data[parameter] >= q_low) & (group_data[parameter] <= q_high)]
#         filtered_mean = filtered_data[parameter].mean()
#         filtered_median = filtered_data[parameter].median()
#         # Create a DataFrame with the new row
#         new_row = pd.DataFrame({'PFT': [pft], 'Parameter': [parameter], 'Filtered_Mean': [filtered_mean], 'Filtered_Median': [filtered_median]})
        
#         # Append the new DataFrame to the list
#         dfs_to_concat.append(new_row)

# # Concatenate all DataFrames in the list
# filtered_mean_median_df = pd.concat(dfs_to_concat, ignore_index=True)
# filtered_mean_median_df.to_excel(base_path + "/filtered_mean_median_R2_RMSE_"+run_ID+".xlsx", index=False)

fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 20))
axes = axes.flatten()

for i, parameter in enumerate(parameters_to_plot):
    sns.boxplot(x='PFT', y=parameter, data=df_parameters, ax=axes[i])
    sns.swarmplot(x='PFT', y=parameter, data=df_parameters, color="0.25", alpha=0.5, ax=axes[i])
    axes[i].set_title(f'{parameter} by PFT')
    axes[i].set_xlabel('PFT')
    axes[i].set_ylabel(parameter)
    axes[i].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.savefig(
    base_path +
    "/boxplot_R2_RMSE_"
    + VPRM_old_or_new
    + "_"
    + run_ID
    + ".eps",
    dpi=300,
    bbox_inches="tight",
)