In [1]:
import os
import warnings
import json
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt

# Try seaborn for prettier heatmaps; fall back to matplotlib if missing
try:
    import seaborn as sns
    HAS_SNS = True
except Exception:
    HAS_SNS = False

import tensorflow as tf
from tensorflow import keras

warnings.filterwarnings("ignore", category=FutureWarning)
plt.rcParams["figure.dpi"] = 120

# ---------------------- Paths ----------------------
BASE_DIR = r"C:\Users\sagni\Downloads\Agri Mind"
ARCHIVE  = os.path.join(BASE_DIR, "archive")

HIST_CSV = os.path.join(BASE_DIR, "neuro_history.csv")
PKL_PATH = os.path.join(BASE_DIR, "neuro_preprocess.pkl")
H5_PATH  = os.path.join(BASE_DIR, "neuro_model.h5")
DF_PATH  = os.path.join(ARCHIVE, "yield_df.csv")  # pre-merged dataset

OUT_MAE_PNG   = os.path.join(BASE_DIR, "neuro_accuracy_mae.png")
OUT_LOSS_PNG  = os.path.join(BASE_DIR, "neuro_accuracy_loss.png")
OUT_CORR_PNG  = os.path.join(BASE_DIR, "neuro_corr_heatmap.png")
OUT_PVA_PNG   = os.path.join(BASE_DIR, "neuro_pred_vs_actual_heatmap.png")

# ---------------------- Utils ----------------------
def ensure_exists(p: str, name: str):
    if not os.path.exists(p):
        raise FileNotFoundError(f"{name} not found at: {p}")

def drop_row_id_cols(df: pd.DataFrame) -> pd.DataFrame:
    cands = [c for c in df.columns if c.lower() in {"unnamed: 0", "index", "id"} or c.lower().startswith("unnamed:")]
    return df.drop(columns=cands, errors="ignore")

def detect_target(df: pd.DataFrame) -> str:
    for c in ["hg/ha_yield", "yield", "Yield", "target", "y"]:
        if c in df.columns:
            return c
    # fallback: last numeric column
    num_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    if not num_cols:
        raise ValueError("No numeric target column detected.")
    return num_cols[-1]

def to_numpy(x):
    if hasattr(x, "toarray"):
        return x.toarray()
    return np.asarray(x)

# ---------------------- Accuracy Graphs ----------------------
def plot_accuracy_graphs(history_csv: str):
    ensure_exists(history_csv, "history CSV")
    hist = pd.read_csv(history_csv)

    # Expected columns from Keras CSVLogger: epoch,loss,mae,val_loss,val_mae
    epochs = np.arange(1, len(hist) + 1)

    # MAE
    plt.figure(figsize=(7, 4.5))
    if "mae" in hist.columns:
        plt.plot(epochs, hist["mae"], label="Train MAE")
    if "val_mae" in hist.columns:
        plt.plot(epochs, hist["val_mae"], label="Val MAE")
    plt.xlabel("Epoch")
    plt.ylabel("MAE")
    plt.title("Model Accuracy (MAE)")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(OUT_MAE_PNG)
    plt.close()

    # Loss
    plt.figure(figsize=(7, 4.5))
    if "loss" in hist.columns:
        plt.plot(epochs, hist["loss"], label="Train Loss")
    if "val_loss" in hist.columns:
        plt.plot(epochs, hist["val_loss"], label="Val Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss (MSE)")
    plt.title("Training Loss")
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig(OUT_LOSS_PNG)
    plt.close()

# ---------------------- Heatmaps ----------------------
def plot_corr_heatmap(df_csv: str):
    ensure_exists(df_csv, "dataset CSV")
    df = pd.read_csv(df_csv)
    df = drop_row_id_cols(df)
    target = detect_target(df)
    df = df.dropna(subset=[target]).copy()

    # Use only numeric columns (incl. target) to keep correlation meaningful
    num_df = df.select_dtypes(include=[np.number]).copy()
    # Keep only columns with some variance
    nunique = num_df.nunique()
    keep_cols = nunique[nunique > 1].index.tolist()
    num_df = num_df[keep_cols]

    # Limit width for readability if too many columns
    MAX_COLS = 25
    if num_df.shape[1] > MAX_COLS:
        # pick top cols most correlated with target
        corrs = num_df.corr(numeric_only=True)[target].abs().sort_values(ascending=False)
        top_cols = corrs.head(MAX_COLS).index.tolist()
        # Make sure target is included
        if target not in top_cols:
            top_cols = [target] + [c for c in top_cols if c != target]
        num_df = num_df[top_cols]

    corr = num_df.corr(numeric_only=True)

    plt.figure(figsize=(min(12, 0.6*corr.shape[1]+4), min(10, 0.6*corr.shape[0]+4)))
    if HAS_SNS:
        sns.heatmap(corr, annot=False, cmap="viridis", square=False, cbar=True)
    else:
        plt.imshow(corr.values, aspect="auto", cmap="viridis")
        plt.xticks(range(corr.shape[1]), corr.columns, rotation=90)
        plt.yticks(range(corr.shape[0]), corr.index)
        plt.colorbar()
    plt.title("Correlation Heatmap (numeric features)")
    plt.tight_layout()
    plt.savefig(OUT_CORR_PNG, bbox_inches="tight")
    plt.close()

def plot_pred_vs_actual_heatmap(pkl_path: str, h5_path: str, df_csv: str):
    ensure_exists(pkl_path, "preprocess PKL")
    ensure_exists(h5_path, "model H5")
    ensure_exists(df_csv, "dataset CSV")

    bundle = joblib.load(pkl_path)
    preprocess = bundle["preprocess"]
    target_col = bundle.get("target_col", None)

    df = pd.read_csv(df_csv)
    df = drop_row_id_cols(df)
    if target_col is None:
        target_col = detect_target(df)
    df = df.dropna(subset=[target_col]).copy()

    X = df.drop(columns=[target_col])
    y = df[target_col].astype(float)

    Xp = preprocess.transform(X)
    model = keras.models.load_model(h5_path, compile=False)
    y_pred = model.predict(to_numpy(Xp), verbose=0).ravel()

    # 2D histogram as heatmap
    plt.figure(figsize=(6.5, 6))
    # Use equal ranges for better visibility
    y_all = np.concatenate([y.values, y_pred])
    vmin, vmax = np.nanpercentile(y_all, [1, 99])
    bins = 60
    plt.hist2d(y, y_pred, bins=bins, range=[[vmin, vmax], [vmin, vmax]], cmap="viridis")
    plt.xlabel("Actual")
    plt.ylabel("Predicted")
    plt.title("Predicted vs Actual (Heatmap)")
    cbar = plt.colorbar()
    cbar.set_label("Count")
    # Diagonal reference
    lims = [vmin, vmax]
    plt.plot(lims, lims, linestyle="--", linewidth=1)
    plt.tight_layout()
    plt.savefig(OUT_PVA_PNG)
    plt.close()

# ---------------------- Run all ----------------------
if __name__ == "__main__":
    print("[INFO] Generating accuracy graphs...")
    plot_accuracy_graphs(HIST_CSV)
    print("[INFO] Saved:", OUT_MAE_PNG)
    print("[INFO] Saved:", OUT_LOSS_PNG)

    print("[INFO] Building correlation heatmap...")
    plot_corr_heatmap(DF_PATH)
    print("[INFO] Saved:", OUT_CORR_PNG)

    print("[INFO] Building Predicted vs Actual heatmap...")
    plot_pred_vs_actual_heatmap(PKL_PATH, H5_PATH, DF_PATH)
    print("[INFO] Saved:", OUT_PVA_PNG)

    print("[DONE] All plots created in:", BASE_DIR)


[INFO] Generating accuracy graphs...
[INFO] Saved: C:\Users\sagni\Downloads\Agri Mind\neuro_accuracy_mae.png
[INFO] Saved: C:\Users\sagni\Downloads\Agri Mind\neuro_accuracy_loss.png
[INFO] Building correlation heatmap...
[INFO] Saved: C:\Users\sagni\Downloads\Agri Mind\neuro_corr_heatmap.png
[INFO] Building Predicted vs Actual heatmap...
[INFO] Saved: C:\Users\sagni\Downloads\Agri Mind\neuro_pred_vs_actual_heatmap.png
[DONE] All plots created in: C:\Users\sagni\Downloads\Agri Mind
