In [5]:
# ============================================================
# 🌾 Precision Farming & Crop Health Forecasting System
# Author: Annan Sadr
# ============================================================

import os
import json
import yaml
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib

# ------------------------------------------------------------
# 📂 Paths
# ------------------------------------------------------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\Precision Farming & Crop Health Forecasting System"
PATH_PEST = os.path.join(BASE_DIR, "archive", "pesticides.csv")
PATH_RAIN = os.path.join(BASE_DIR, "archive", "rainfall.csv")
PATH_TEMP = os.path.join(BASE_DIR, "archive", "temp.csv")
PATH_YIELD = os.path.join(BASE_DIR, "archive", "yield.csv")

MODEL_PATH = os.path.join(BASE_DIR, "precision_model.h5")
SCALER_X_PATH = os.path.join(BASE_DIR, "scaler_x.pkl")
SCALER_Y_PATH = os.path.join(BASE_DIR, "scaler_y.pkl")
CONFIG_PATH = os.path.join(BASE_DIR, "config.yaml")
RESULT_PATH = os.path.join(BASE_DIR, "precision_results.json")

# ------------------------------------------------------------
# 🧩 Helper: Load CSV safely
# ------------------------------------------------------------
def load_csv_optimized(path, name):
    df = pd.read_csv(path, low_memory=False)
    print(f"  → Loaded {name}: {df.shape}")
    for col in df.select_dtypes(include=["float64", "int64"]).columns:
        df[col] = pd.to_numeric(df[col], downcast="float")
    return df

# ------------------------------------------------------------
# 🧩 Case-insensitive Year detection
# ------------------------------------------------------------
def find_year_column(df):
    for c in df.columns:
        if str(c).strip().lower() == "year":
            return c
    raise ValueError(f"No 'Year' column found in: {df.columns.tolist()}")

# ------------------------------------------------------------
# 📊 Aggregation Function
# ------------------------------------------------------------
def aggregate_yearly(df):
    """Aggregate all numeric columns per year safely (case-insensitive)."""
    year_col = find_year_column(df)
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    grouped = df.groupby(year_col, as_index=False)[numeric_cols].mean()
    grouped = grouped.loc[:, ~grouped.columns.duplicated()]
    grouped.rename(columns={year_col: "Year"}, inplace=True)  # unify column name
    return grouped

# ------------------------------------------------------------
# 🧩 Load Datasets
# ------------------------------------------------------------
print("[INFO] Loading datasets...")
df_pest = load_csv_optimized(PATH_PEST, "Pesticides")
df_rain = load_csv_optimized(PATH_RAIN, "Rainfall")
df_temp = load_csv_optimized(PATH_TEMP, "Temperature")
df_yield = load_csv_optimized(PATH_YIELD, "Yield")

# ------------------------------------------------------------
# 📊 Aggregate numeric data per Year
# ------------------------------------------------------------
print("[INFO] Aggregating per-year averages...")
pest_yearly = aggregate_yearly(df_pest)
rain_yearly = aggregate_yearly(df_rain)
temp_yearly = aggregate_yearly(df_temp)
yield_yearly = aggregate_yearly(df_yield)

print("  → After aggregation:")
print(f"     Pesticides: {pest_yearly.shape}")
print(f"     Rainfall:   {rain_yearly.shape}")
print(f"     Temp:       {temp_yearly.shape}")
print(f"     Yield:      {yield_yearly.shape}")

# ------------------------------------------------------------
# 🔗 Merge Aggregated Data
# ------------------------------------------------------------
print("[INFO] Merging yearly aggregates...")
df_merged = (
    yield_yearly.merge(pest_yearly, on="Year", how="left")
                .merge(rain_yearly, on="Year", how="left")
                .merge(temp_yearly, on="Year", how="left")
)

df_merged = df_merged.dropna().reset_index(drop=True)
print(f"[INFO] Final merged shape: {df_merged.shape}")

# ------------------------------------------------------------
# 🎯 Feature / Target setup
# ------------------------------------------------------------
target_col = "Yield" if "Yield" in df_merged.columns else df_merged.columns[-1]
feature_cols = [c for c in df_merged.columns if c not in ["Year", target_col]]

X = df_merged[feature_cols].values
y = df_merged[target_col].values.reshape(-1, 1)
print(f"[INFO] Features: {len(feature_cols)} | Target: {target_col}")

# ------------------------------------------------------------
# 🔢 Scaling
# ------------------------------------------------------------
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_scaled = scaler_x.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# ------------------------------------------------------------
# 🧠 Split
# ------------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, random_state=42
)

# ------------------------------------------------------------
# 🏗️ Model
# ------------------------------------------------------------
model = Sequential([
    Dense(64, activation="relu", input_dim=X_train.shape[1]),
    Dropout(0.2),
    Dense(32, activation="relu"),
    Dense(1, activation="linear")
])
model.compile(optimizer="adam", loss="mse", metrics=["mae"])
print("[INFO] Model summary:")
model.summary()

# ------------------------------------------------------------
# 🚀 Train
# ------------------------------------------------------------
print("[INFO] Training model...")
history = model.fit(
    X_train, y_train,
    epochs=200, batch_size=4,
    validation_split=0.2, verbose=0
)
print("[INFO] ✅ Training complete")

# ------------------------------------------------------------
# 📈 Evaluate
# ------------------------------------------------------------
loss, mae = model.evaluate(X_test, y_test, verbose=0)
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_test)

rmse = np.sqrt(np.mean((y_true - y_pred) ** 2))
r2 = 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))
print(f"[RESULT] RMSE={rmse:.3f}, MAE={mae:.3f}, R²={r2:.3f}")

# ------------------------------------------------------------
# 💾 Save Artifacts
# ------------------------------------------------------------
print("[INFO] Saving artifacts...")
model.save(MODEL_PATH)
joblib.dump(scaler_x, SCALER_X_PATH)
joblib.dump(scaler_y, SCALER_Y_PATH)

config = {
    "features": feature_cols,
    "target": target_col,
    "epochs": 200,
    "optimizer": "adam",
    "loss": "mse",
    "aggregated_by": "Year",
}
with open(CONFIG_PATH, "w") as f:
    yaml.dump(config, f)

results = {
    "RMSE": float(rmse),
    "MAE": float(mae),
    "R2_Score": float(r2),
    "Dataset_Size": int(len(df_merged)),
    "Features": feature_cols,
    "Target": target_col,
}
with open(RESULT_PATH, "w") as f:
    json.dump(results, f, indent=4)

print(f"[INFO] ✅ All artifacts saved in {BASE_DIR}")
print(" - precision_model.h5")
print(" - scaler_x.pkl")
print(" - scaler_y.pkl")
print(" - config.yaml")
print(" - precision_results.json")

# ------------------------------------------------------------
# 🧾 Sample predictions
# ------------------------------------------------------------
print("\nSample predictions:")
for i in range(min(5, len(y_pred))):
    print(f"Predicted: {y_pred[i][0]:.2f} | Actual: {y_true[i][0]:.2f}")


[INFO] Loading datasets...
  → Loaded Pesticides: (4349, 7)
  → Loaded Rainfall: (6727, 3)
  → Loaded Temperature: (71311, 3)
  → Loaded Yield: (56717, 12)
[INFO] Aggregating per-year averages...
  → After aggregation:
     Pesticides: (27, 2)
     Rainfall:   (31, 1)
     Temp:       (271, 2)
     Yield:      (56, 6)
[INFO] Merging yearly aggregates...
[INFO] Final merged shape: (24, 8)
[INFO] Features: 6 | Target: avg_temp


[INFO] Model summary:
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                448       
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 

  saving_api.save_model(
