In [1]:
import os, json, yaml, warnings
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle

warnings.filterwarnings("ignore")

# ==========================================================
# 📂 PATH SETUP
# ==========================================================
BASE_PATH = r"C:\Users\NXTWAVE\Downloads\Greenhouse Climate & Crop Yield Optimizer"
DATA_PATH = os.path.join(BASE_PATH, r"archive\20210703_greenhouse_data.csv")

MODEL_PATH   = os.path.join(BASE_PATH, "biosphere_model.h5")
SCALER_PATH  = os.path.join(BASE_PATH, "biosphere_scaler.pkl")
CONFIG_PATH  = os.path.join(BASE_PATH, "biosphere_config.yaml")
RESULTS_CSV  = os.path.join(BASE_PATH, "biosphere_results.csv")
PRED_JSON    = os.path.join(BASE_PATH, "biosphere_prediction.json")

# ==========================================================
# 1️⃣ LOAD ARTIFACTS
# ==========================================================
print("[INFO] Loading model and scalers...")
model = load_model(MODEL_PATH)
with open(SCALER_PATH, "rb") as f:
    scalers = pickle.load(f)
scaler_x, scaler_y = scalers["x"], scalers["y"]

# ==========================================================
# 2️⃣ LOAD DATASET
# ==========================================================
print("[INFO] Loading dataset...")
df = pd.read_csv(DATA_PATH, sep=";", engine="python", on_bad_lines="skip")
df.columns = [c.strip().replace(" ", "_").lower() for c in df.columns]
df = df.dropna(axis=1, how="all").dropna(axis=0, how="any").reset_index(drop=True)

for c in df.columns:
    df[c] = pd.to_numeric(df[c], errors="ignore")

numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
if len(numeric_cols) < 2:
    raise ValueError("❌ Not enough numeric columns for prediction.")

print(f"[INFO] Numeric columns detected: {numeric_cols}")

features = numeric_cols
target = features[-1]

X = df[features[:-1]].values
y = df[target].values.reshape(-1, 1)

# ==========================================================
# 3️⃣ SCALING
# ==========================================================
print("[INFO] Scaling input features...")
X_scaled = scaler_x.transform(X)
y_scaled = scaler_y.transform(y)

# ==========================================================
# 4️⃣ CREATE SEQUENCES (for LSTM)
# ==========================================================
def create_sequences(X, y, time_steps=5):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return np.array(Xs), np.array(ys)

TIME_STEPS = 5
X_seq, y_seq = create_sequences(X_scaled, y_scaled, TIME_STEPS)

# ==========================================================
# 5️⃣ GENERATE PREDICTIONS
# ==========================================================
print("[INFO] Generating predictions...")
y_pred_scaled = model.predict(X_seq)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
y_true = scaler_y.inverse_transform(y_seq)

# ==========================================================
# 6️⃣ EVALUATE PERFORMANCE
# ==========================================================
mae = mean_absolute_error(y_true, y_pred)
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)

print(f"[RESULT] MAE={mae:.4f}, RMSE={rmse:.4f}, R²={r2:.4f}")

# ==========================================================
# 7️⃣ SAVE OUTPUTS
# ==========================================================
# -- Result CSV --
results_df = pd.DataFrame({
    "Actual": y_true.flatten(),
    "Predicted": y_pred.flatten()
})
results_df.to_csv(RESULTS_CSV, index=False)
print(f"[INFO] ✅ Saved prediction results to {RESULTS_CSV}")

# -- JSON Summary --
prediction_summary = {
    "dataset": DATA_PATH,
    "records_evaluated": len(y_true),
    "metrics": {
        "MAE": float(mae),
        "RMSE": float(rmse),
        "R2": float(r2)
    },
    "sample_predictions": [
        {"actual": float(a), "predicted": float(p)}
        for a, p in zip(y_true.flatten()[:10], y_pred.flatten()[:10])
    ]
}
with open(PRED_JSON, "w") as f:
    json.dump(prediction_summary, f, indent=4)
print(f"[INFO] ✅ Saved summary JSON to {PRED_JSON}")

# ==========================================================
# ✅ COMPLETION
# ==========================================================
print("\n[INFO] ✅ Prediction completed successfully.")
print(f"📦 Files saved in {BASE_PATH}")
print("  ├── biosphere_results.csv")
print("  └── biosphere_prediction.json")



[INFO] Loading model and scalers...

[INFO] Loading dataset...
[INFO] Numeric columns detected: ['id', 'online_humidity_percentage']
[INFO] Scaling input features...
[INFO] Generating predictions...
[RESULT] MAE=14.5224, RMSE=17.3648, R²=0.0580
[INFO] ✅ Saved prediction results to C:\Users\NXTWAVE\Downloads\Greenhouse Climate & Crop Yield Optimizer\biosphere_results.csv
[INFO] ✅ Saved summary JSON to C:\Users\NXTWAVE\Downloads\Greenhouse Climate & Crop Yield Optimizer\biosphere_prediction.json

[INFO] ✅ Prediction completed successfully.
📦 Files saved in C:\Users\NXTWAVE\Downloads\Greenhouse Climate & Crop Yield Optimizer
  ├── biosphere_results.csv
  └── biosphere_prediction.json
