# Observed vs Predicted all traits

In [1]:
import os
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


os.makedirs('ArticleFigures', exist_ok=True)

# Load the dataset
df = pd.read_excel('Dataset_11_features.xlsx')

# Define input features and target traits 
input_features = ['YER', 'DAT', 'WS', 'CWC', 'PP', 'GP', 'GW', 'GY', 'RWC', 'RS', 'CT']
target_traits = ['PALK', 'OLA', 'ECO', 'LINO', 'LINK', 'MUFA', 'PUFA']

# Prepare data
X = df[input_features]
y = df[target_traits]

# Standardize features 
scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)

# Split into training and testing sets 
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# Load trained models for each trait
models = {}
for trait in target_traits:
    with open(f'best_model_{trait}.pkl', 'rb') as f:
        models[trait] = pickle.load(f)


provided_metrics = {
    'PALK': {'MAE': 0.020, 'R2': 0.992, 'RMSE': 0.012},
    'OLA' : {'MAE': 0.152, 'R2': 0.986, 'RMSE': 0.114},
    'ECO' : {'MAE': 0.134, 'R2': 0.971, 'RMSE': 0.116},
    'LINO': {'MAE': 0.457, 'R2': 0.971, 'RMSE': 0.327},
    'LINK': {'MAE': 0.253, 'R2': 0.977, 'RMSE': 0.185},
    'MUFA': {'MAE': 0.198, 'R2': 0.860, 'RMSE': 0.203},
    'PUFA': {'MAE': 0.540, 'R2': 0.849, 'RMSE': 0.467},
}

# Determine grid size
n = len(target_traits)
cols = 4
rows = int(np.ceil(n / cols))

# Create grid of observed vs predicted scatterplots
fig, axes = plt.subplots(rows, cols, figsize=(4*cols, 4*rows))
axes = axes.flatten()

for ax, trait in zip(axes, target_traits):
    model = models[trait]
    y_true = np.asarray(y_test[trait].values).ravel()
    # get predictions (flatten)
    y_pred = np.asarray(model.predict(X_test)).ravel()

    # Scatter plot
    ax.scatter(y_true, y_pred, alpha=0.7, color='darkorange', edgecolor='k', linewidth=0.2)

    # Diagonal 1:1 line
    vmin = min(np.nanmin(y_true), np.nanmin(y_pred))
    vmax = max(np.nanmax(y_true), np.nanmax(y_pred))
    ax.plot([vmin, vmax], [vmin, vmax], color='darkred', linestyle='--', linewidth=1)

    # Labels and title
    ax.set_xlabel(f'Observed {trait}')
    ax.set_ylabel(f'Predicted {trait}')
    ax.set_title(f'{trait}: Observed vs Predicted')

    # Formatted and placed top-left
    m = provided_metrics.get(trait, {})
    metrics_text = f"MAE = {m.get('MAE', np.nan):.3f}\nRÂ² = {m.get('R2', np.nan):.3f}\nRMSE = {m.get('RMSE', np.nan):.3f}"
    ax.text(0.05, 0.95, metrics_text, transform=ax.transAxes, fontsize=9,
            verticalalignment='top', bbox=dict(boxstyle='round,pad=0.4', facecolor='white', alpha=0.85, edgecolor='0.8'))

    ax.grid(True)

# Turn off any unused axes
for ax in axes[n:]:
    ax.axis('off')

plt.tight_layout()

# Save combined figure at high resolution
out_path = os.path.join('ArticleFigures', 'Observed_vs_Predicted_all_traits.png')
fig.savefig(out_path, dpi=600, bbox_inches='tight')
plt.close(fig)

print("Saved:", out_path)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  import pkg_resources
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#se

Saved: ArticleFigures\Observed_vs_Predicted_all_traits.png
