In [1]:
# ============================================================
# üåæ Precision Farming & Crop Health Forecasting System
# Author: Annan Sadr
# With Auto Graphs (Accuracy, Heatmap, Comparison, Prediction, Result)
# ============================================================

import os
import json
import yaml
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib

# ------------------------------------------------------------
# üìÇ Paths
# ------------------------------------------------------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Precision Farming & Crop Health Forecasting System"
PATH_PEST = os.path.join(BASE_DIR, "archive", "pesticides.csv")
PATH_RAIN = os.path.join(BASE_DIR, "archive", "rainfall.csv")
PATH_TEMP = os.path.join(BASE_DIR, "archive", "temp.csv")
PATH_YIELD = os.path.join(BASE_DIR, "archive", "yield.csv")

# Output file paths
MODEL_PATH = os.path.join(BASE_DIR, "precision_model.h5")
SCALER_X_PATH = os.path.join(BASE_DIR, "scaler_x.pkl")
SCALER_Y_PATH = os.path.join(BASE_DIR, "scaler_y.pkl")
CONFIG_PATH = os.path.join(BASE_DIR, "config.yaml")
RESULT_PATH = os.path.join(BASE_DIR, "precision_results.json")

# Graph paths
ACC_GRAPH = os.path.join(BASE_DIR, "precision_accuracy_graph.png")
HEATMAP_GRAPH = os.path.join(BASE_DIR, "precision_heatmap.png")
COMPARE_GRAPH = os.path.join(BASE_DIR, "precision_comparison_graph.png")
PRED_GRAPH = os.path.join(BASE_DIR, "precision_prediction_graph.png")
RESULT_GRAPH = os.path.join(BASE_DIR, "precision_result_graph.png")

# ------------------------------------------------------------
# üß© Helper: Safe CSV loading
# ------------------------------------------------------------
def load_csv_optimized(path, name):
    df = pd.read_csv(path, low_memory=False)
    print(f"  ‚Üí Loaded {name}: {df.shape}")
    for col in df.select_dtypes(include=["float64", "int64"]).columns:
        df[col] = pd.to_numeric(df[col], downcast="float")
    return df

def find_year_column(df):
    for c in df.columns:
        if str(c).strip().lower() == "year":
            return c
    raise ValueError(f"No 'Year' column found in: {df.columns.tolist()}")

def aggregate_yearly(df):
    """Aggregate all numeric columns per year safely."""
    year_col = find_year_column(df)
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    grouped = df.groupby(year_col, as_index=False)[numeric_cols].mean()
    grouped = grouped.loc[:, ~grouped.columns.duplicated()]
    grouped.rename(columns={year_col: "Year"}, inplace=True)
    return grouped

# ------------------------------------------------------------
# üß© Load Data
# ------------------------------------------------------------
print("[INFO] Loading datasets...")
df_pest = load_csv_optimized(PATH_PEST, "Pesticides")
df_rain = load_csv_optimized(PATH_RAIN, "Rainfall")
df_temp = load_csv_optimized(PATH_TEMP, "Temperature")
df_yield = load_csv_optimized(PATH_YIELD, "Yield")

# ------------------------------------------------------------
# üìä Aggregate per year
# ------------------------------------------------------------
print("[INFO] Aggregating yearly data...")
pest_yearly = aggregate_yearly(df_pest)
rain_yearly = aggregate_yearly(df_rain)
temp_yearly = aggregate_yearly(df_temp)
yield_yearly = aggregate_yearly(df_yield)

print("[INFO] Merging aggregated datasets...")
df_merged = (
    yield_yearly.merge(pest_yearly, on="Year", how="left")
                .merge(rain_yearly, on="Year", how="left")
                .merge(temp_yearly, on="Year", how="left")
)
df_merged = df_merged.dropna().reset_index(drop=True)
print(f"[INFO] Final merged shape: {df_merged.shape}")

# ------------------------------------------------------------
# üéØ Features & Target
# ------------------------------------------------------------
target_col = "Yield" if "Yield" in df_merged.columns else df_merged.columns[-1]
feature_cols = [c for c in df_merged.columns if c not in ["Year", target_col]]

X = df_merged[feature_cols].values
y = df_merged[target_col].values.reshape(-1, 1)

scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)

# ------------------------------------------------------------
# üß† Model
# ------------------------------------------------------------
model = Sequential([
    Dense(64, activation="relu", input_dim=X_train.shape[1]),
    Dropout(0.2),
    Dense(32, activation="relu"),
    Dense(1, activation="linear")
])
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

print("[INFO] Training model...")
history = model.fit(X_train, y_train, epochs=200, batch_size=4, validation_split=0.2, verbose=0)
print("[INFO] ‚úÖ Training complete")

# ------------------------------------------------------------
# üìà Evaluate
# ------------------------------------------------------------
loss, mae = model.evaluate(X_test, y_test, verbose=0)
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_test)

rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
r2 = 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))

# ------------------------------------------------------------
# üíæ Save Artifacts
# ------------------------------------------------------------
model.save(MODEL_PATH)
joblib.dump(scaler_x, SCALER_X_PATH)
joblib.dump(scaler_y, SCALER_Y_PATH)

config = {
    "features": feature_cols,
    "target": target_col,
    "epochs": 200,
    "optimizer": "adam",
    "loss": "mse",
    "aggregated_by": "Year"
}
with open(CONFIG_PATH, "w") as f:
    yaml.dump(config, f)

results = {
    "RMSE": float(rmse),
    "MAE": float(mae),
    "R2_Score": float(r2),
    "Dataset_Size": int(len(df_merged)),
    "Features": feature_cols,
    "Target": target_col
}
with open(RESULT_PATH, "w") as f:
    json.dump(results, f, indent=4)

# ------------------------------------------------------------
# üé® Visualizations
# ------------------------------------------------------------
plt.style.use("seaborn-v0_8-darkgrid")

# 1Ô∏è‚É£ Accuracy Graph (Loss & MAE)
plt.figure(figsize=(8, 5))
plt.plot(history.history["loss"], label="Training Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.plot(history.history["mae"], label="Training MAE")
plt.plot(history.history["val_mae"], label="Validation MAE")
plt.title("Model Accuracy (Loss & MAE)")
plt.xlabel("Epochs")
plt.ylabel("Value")
plt.legend()
plt.tight_layout()
plt.savefig(ACC_GRAPH)
plt.close()

# 2Ô∏è‚É£ Correlation Heatmap
plt.figure(figsize=(8, 6))
corr = df_merged[feature_cols + [target_col]].corr()
sns.heatmap(corr, annot=True, cmap="coolwarm")
plt.title("Feature Correlation Heatmap")
plt.tight_layout()
plt.savefig(HEATMAP_GRAPH)
plt.close()

# 3Ô∏è‚É£ Comparison Graph (Actual vs Predicted)
plt.figure(figsize=(8, 5))
plt.scatter(y_true, y_pred, color="royalblue", alpha=0.7)
plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], "r--")
plt.title("Actual vs Predicted Yield Comparison")
plt.xlabel("Actual Yield")
plt.ylabel("Predicted Yield")
plt.tight_layout()
plt.savefig(COMPARE_GRAPH)
plt.close()

# 4Ô∏è‚É£ Prediction Graph (Predicted Yield per Year)
plt.figure(figsize=(8, 5))
years = df_merged["Year"][-len(y_pred):]
plt.plot(years, y_pred.flatten(), "b-o", label="Predicted")
plt.title("Predicted Yield Trend per Year")
plt.xlabel("Year")
plt.ylabel("Predicted Yield")
plt.legend()
plt.tight_layout()
plt.savefig(PRED_GRAPH)
plt.close()

# 5Ô∏è‚É£ Result Graph (RMSE, MAE, R¬≤)
plt.figure(figsize=(7, 5))
metrics = ["RMSE", "MAE", "R¬≤"]
values = [rmse, mae, r2]
bars = plt.bar(metrics, values, color=["#4CAF50", "#2196F3", "#FFC107"])
plt.title("Model Evaluation Summary")
plt.ylabel("Metric Value")
for bar in bars:
    yval = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2.0, yval, f"{yval:.3f}", ha="center", va="bottom")
plt.tight_layout()
plt.savefig(RESULT_GRAPH)
plt.close()

# ------------------------------------------------------------
# üßæ Summary Output
# ------------------------------------------------------------
print("\n‚úÖ MODEL TRAINED SUCCESSFULLY!")
print(f"RMSE: {rmse:.3f} | MAE: {mae:.3f} | R¬≤: {r2:.3f}")
print("[INFO] Artifacts & graphs saved in:")
print(BASE_DIR)
print("""
‚îú‚îÄ‚îÄ precision_model.h5
‚îú‚îÄ‚îÄ scaler_x.pkl
‚îú‚îÄ‚îÄ scaler_y.pkl
‚îú‚îÄ‚îÄ config.yaml
‚îú‚îÄ‚îÄ precision_results.json
‚îú‚îÄ‚îÄ precision_accuracy_graph.png
‚îú‚îÄ‚îÄ precision_heatmap.png
‚îú‚îÄ‚îÄ precision_comparison_graph.png
‚îú‚îÄ‚îÄ precision_prediction_graph.png
‚îî‚îÄ‚îÄ precision_result_graph.png
""")

# Show few predictions in console
for i in range(min(5, len(y_pred))):
    print(f"Predicted: {y_pred[i][0]:.2f} | Actual: {y_true[i][0]:.2f}")



[INFO] Loading datasets...
  ‚Üí Loaded Pesticides: (4349, 7)
  ‚Üí Loaded Rainfall: (6727, 3)
  ‚Üí Loaded Temperature: (71311, 3)
  ‚Üí Loaded Yield: (56717, 12)
[INFO] Aggregating yearly data...
[INFO] Merging aggregated datasets...
[INFO] Final merged shape: (24, 8)


[INFO] Training model...


[INFO] ‚úÖ Training complete


  saving_api.save_model(



‚úÖ MODEL TRAINED SUCCESSFULLY!
RMSE: 0.256 | MAE: 0.184 | R¬≤: -4.251
[INFO] Artifacts & graphs saved in:
C:\Users\NXTWAVE\Downloads\Precision Farming & Crop Health Forecasting System

‚îú‚îÄ‚îÄ precision_model.h5
‚îú‚îÄ‚îÄ scaler_x.pkl
‚îú‚îÄ‚îÄ scaler_y.pkl
‚îú‚îÄ‚îÄ config.yaml
‚îú‚îÄ‚îÄ precision_results.json
‚îú‚îÄ‚îÄ precision_accuracy_graph.png
‚îú‚îÄ‚îÄ precision_heatmap.png
‚îú‚îÄ‚îÄ precision_comparison_graph.png
‚îú‚îÄ‚îÄ precision_prediction_graph.png
‚îî‚îÄ‚îÄ precision_result_graph.png

Predicted: 17.90 | Actual: 18.25
Predicted: 18.06 | Actual: 18.18
Predicted: 17.63 | Actual: 17.98
Predicted: 18.25 | Actual: 17.98
Predicted: 17.98 | Actual: 18.02
