All plots

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy as np

# --- 1. Consolidate Data Preparation ---

print("Loading and preparing data...")
# Load the main dataset
main_file_path = "/explore/nobackup/people/spotter5/new_combustion/2025-08-08_LC_FISL_Original_combustionModelPredictors.csv"
df_main = pd.read_csv(main_file_path)

# Load the file containing IDs for the 'old' dataset
old_ids_file_path = "/explore/nobackup/people/spotter5/new_combustion/all_predictors.csv"
old_ids_df = pd.read_csv(old_ids_file_path)

# Get the unique IDs to define the 'old' and 'new' subsets
old_ids = old_ids_df['id'].unique()

# Create the three dataframes
df_all = df_main.dropna()
df_new = df_main[~df_main['id'].isin(old_ids)].dropna()
df_old = df_main[df_main['id'].isin(old_ids)].dropna()

print(f"Data prepared: All ({len(df_all)} rows), New ({len(df_new)} rows), Old ({len(df_old)} rows)")

# --- 2. Define Variables and Output Directory ---

# Define the dependent variables for each column
target_y_left = 'above.carbon.combusted'
target_y_right = 'below.ground.carbon.combusted'

# Columns to exclude from predictor variables
exclude_columns = [
    'above.carbon.combusted', 
    'below.ground.carbon.combusted',
    'burn_year', 
    'project.name', 
    'latitude', 
    'longitude', 
    'Date',
    'id',
    'CNA_MAR',
    'fireYr',
    'lat',
    'lon',
    'project_name'
]

# Get list of independent (predictor) variables
independent_vars = [col for col in df_all.columns if col not in exclude_columns]

# Define a new output directory for the 3x2 plots
output_dir = os.path.expanduser("/explore/nobackup/people/spotter5/new_combustion/scatter_plots/combined_3x2_plots")
os.makedirs(output_dir, exist_ok=True)

# --- 3. Helper Function for Plotting ---

def create_subplot(ax, df, x_var, y_var, title):
    """Generates a single scatter subplot with an R^2 value."""
    if df.empty:
        ax.text(0.5, 0.5, 'No Data', ha='center', va='center', fontsize=12)
        ax.set_title(title)
        return
        
    # Scatter plot
    ax.scatter(df[x_var], df[y_var], alpha=0.5, s=10)
    
    # Calculate R^2 (as squared Pearson correlation)
    correlation = df[x_var].corr(df[y_var])
    r2_value = correlation**2
    
    # --- REMOVED 1:1 LINE AND AXIS SCALING ---
    
    # Add R^2 text to the lower right
    ax.text(0.95, 0.05, f'$R^2 = {r2_value:.2f}$', 
            transform=ax.transAxes, fontsize=12,
            verticalalignment='bottom', horizontalalignment='right',
            bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.7))
            
    ax.set_title(title)
    ax.set_ylabel(y_var)
    ax.grid(True, linestyle='--', alpha=0.6)

# --- 4. Generate and Save Combined Plots ---

print(f"\nGenerating {len(independent_vars)} combined 3x2 plots...")

for var in independent_vars:
    # Create a 3x2 figure
    fig, axes = plt.subplots(3, 2, figsize=(14, 18), sharex=True)
    
    # --- Create each subplot using the helper function ---
    
    # Row 1: All Data
    create_subplot(axes[0, 0], df_all, var, target_y_left, title=f'All Data vs. Aboveground')
    create_subplot(axes[0, 1], df_all, var, target_y_right, title=f'All Data vs. Belowground')
    
    # Row 2: New Data
    create_subplot(axes[1, 0], df_new, var, target_y_left, title=f'New Data vs. Aboveground')
    create_subplot(axes[1, 1], df_new, var, target_y_right, title=f'New Data vs. Belowground')

    # Row 3: Old Data
    create_subplot(axes[2, 0], df_old, var, target_y_left, title=f'Old Data vs. Aboveground')
    create_subplot(axes[2, 1], df_old, var, target_y_right, title=f'Old Data vs. Belowground')
    
    # Set the shared X-axis label only on the bottom row of plots
    axes[2, 0].set_xlabel(var)
    axes[2, 1].set_xlabel(var)
    
    # Add a main title for the entire figure
    fig.suptitle(f'Predictor Variable: {var}', fontsize=20, y=0.99)
    
    # Adjust layout to prevent overlap and make space for suptitle
    fig.tight_layout(rect=[0, 0, 1, 0.97])
    
    # Save the combined figure
    output_file = os.path.join(output_dir, f"{var}_vs_combustion_combined.png")
    plt.savefig(output_file, dpi=150)
    plt.close(fig)

print(f"\n✅ Done! All combined plots saved to: {output_dir}")

Loading and preparing data...
Data prepared: All (929 rows), New (431 rows), Old (498 rows)

Generating 73 combined 3x2 plots...

✅ Done! All combined plots saved to: /explore/nobackup/people/spotter5/new_combustion/scatter_plots/combined_3x2_plots
