With SRAD/tmean

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import warnings

# Ignore routine warnings for cleaner output
warnings.filterwarnings('ignore')

# 1. Load your dataset
print("--- 📂 Loading dataset ---")
try:
    #
    # PLEASE VERIFY THIS FILE PATH IS CORRECT
    #
    df_new = pd.read_csv("/explore/nobackup/people/spotter5/anna_v/v2/v2_model_training_final.csv")
    df_new = df_new[df_new['flux_method'] == 'EC']

    # --- UNITS CONVERSION and FEATURE ENGINEERING ---
    # Calculate mean temperature
    df_new['tmean_C'] = df_new[['tmmn', 'tmmx']].mean(axis=1)
    
    # Create a 'date' column for time series plotting
    df_new['date'] = pd.to_datetime(df_new[['year', 'month']].assign(day=1))
    
except FileNotFoundError as e:
    print(f"Error: The data file was not found. Please check the path.\n{e}")
    exit()
except KeyError as e:
    print(f"Error: A required column is missing from the dataset: {e}")
    exit()


# 2. Define output path and get site list
comparison_plot_path = os.path.join("/explore/nobackup/people/spotter5/anna_v/v2/exploration", "variable_comparisons_methane")
os.makedirs(comparison_plot_path, exist_ok=True)
print(f"Plots will be saved to: {comparison_plot_path}")

all_sites = df_new['site_reference'].unique()
print(f"Found {len(all_sites)} unique sites to process.")


# --- 🛰️ Plotting Loop ---
for site in all_sites:
    if pd.isnull(site):
        continue

    # Filter for the site AND drop rows without a valid NEE observation
    site_df = df_new[df_new['site_reference'] == site].dropna(subset=['ch4_flux_total']).sort_values('date')

    if site_df.empty:
        print(f" -> Skipping site: {site} (No valid NEE observations)")
        continue
    
    print(f" -> Processing site: {site}")

    # Create a 2x2 subplot figure
    fig, axes = plt.subplots(2, 2, figsize=(24, 18), sharex=True)
    fig.suptitle(f"Site: {site} - Variable Comparison with NEE", fontsize=20, fontweight='bold')

    # --- Plotting Function (to reduce code repetition) ---
    def create_comparison_plot(ax, site_data, var_name, var_unit, var_color):
        ax_twin = ax.twinx()
        
        # Plot NEE on primary axis
        line_nee = ax.plot(site_data['date'], site_data['ch4_flux_total'], color='lightgray', label='CH4', linewidth=2.5, zorder=1)
        ax.set_ylabel('NEE ($gC/m^2/day$)', color='gray', fontsize=12)
        ax.tick_params(axis='y', labelcolor='gray')
        ax.grid(True, linestyle='--', alpha=0.5)

        # Plot variable on secondary axis
        line_var = ax_twin.plot(site_data['date'], site_data[var_name], color=var_color, linestyle='-', label=var_name, zorder=3)
        ax_twin.set_ylabel(f'{var_name} ({var_unit})', color=var_color, fontsize=12)
        ax_twin.tick_params(axis='y', labelcolor=var_color)
        
        ax.set_title(f'NEE vs. {var_name}', fontsize=14, fontweight='bold')
        
        # --- Create a combined legend for both axes ---
        all_handles = line_nee + line_var
        all_labels = [h.get_label() for h in all_handles]
        ax.legend(all_handles, all_labels, loc='upper left')

    # --- Generate the 4 plots ---
    # Top-Left: NDVI vs. NEE
    create_comparison_plot(axes[0, 0], site_df, 'NDVI', 'unitless', 'forestgreen')
    
    # Top-Right: NDWI vs. NEE 
    create_comparison_plot(axes[0, 1], site_df, 'NDWI', 'unitless', 'dodgerblue')

    # Bottom-Left: Mean Temperature vs. NEE
    create_comparison_plot(axes[1, 0], site_df, 'tmean_C', '°C', 'darkblue')
    
    # Bottom-Right: Solar Radiation vs. NEE
    create_comparison_plot(axes[1, 1], site_df, 'srad', 'W/m²', 'saddlebrown')
    
    # --- Final Figure Formatting ---
    for ax in axes.flat:
        ax.set_xlabel('Date', fontsize=12)

    fig.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust rect to make space for suptitle

    plot_filename = f"{site}_2x2_comparison.png"
    plt.savefig(os.path.join(comparison_plot_path, plot_filename), dpi=100)
    plt.close(fig)
    

print(f"\n--- ✅ Finished plotting. All site comparison plots are saved. ---")

--- 📂 Loading dataset ---
Plots will be saved to: /explore/nobackup/people/spotter5/anna_v/v2/exploration/variable_comparisons_new_data
Found 188 unique sites to process.
 -> Processing site: Skyttorp 2_SE-Sk2_tower
 -> Processing site: Wolf_creek_forest_CA-WCF_tower
 -> Processing site: Alberta - Western Peatland - LaBiche River,Black Spruce,Larch Fen_CA-WP1_tower
 -> Processing site: Elgeeii forest station_RU-Ege_tower
 -> Processing site: Faejemyr_SE-Faj_tower
 -> Processing site: Fyodorovskoye2_RU-Fy2_tower
 -> Processing site: Fyodorovskoye_RU-Fyo_tower
 -> Processing site: Gunnarsholt_IS-Gun_tower
 -> Processing site: HJP02 Jack Pine_CA-HJP02_tower
 -> Processing site: HJP75 Jack Pine_CA-HJP75_tower
 -> Processing site: HJP94 Jack Pine_CA-HJP94_tower
 -> Processing site: Hakasia 10 yr_RU-Ha3_tower
 -> Processing site: Hakasia 5yr_RU-Ha2_tower
 -> Processing site: Hakasia Steppe_RU-Ha1_tower
 -> Processing site: Hustai grassland_MN-Hst_tower
 -> Processing site: Hyltemossa_SE-Htm_