# 📊 Model Evaluation Notebook
This notebook evaluates two regression models (Linear Regression and XGBoost) on the crop yield dataset.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import xgboost as xgb
import seaborn as sns
import joblib
import os

# Set plot style
sns.set(style='whitegrid')

## 📥 Load the Dataset

In [None]:
# Load dataset
df = pd.read_csv("data.csv")
df.head()

## 🔧 Prepare Features and Target

In [None]:
features = ["Temperature", "Humidity", "Precipitation", "pH", "Fertilizer"]
target = "Yield"

X = df[features]
y = df[target]

## ✂️ Split the Data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 🤖 Train and Evaluate Models

In [None]:
models = {
    "LinearRegression": LinearRegression(),
    "XGBoost": xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100)
}

os.makedirs("models", exist_ok=True)

results = {}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Save model
    joblib.dump(model, f"models/{name.lower()}.pkl")

    # Evaluation
    rmse = mean_squared_error(y_test, y_pred, squared=False)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    results[name] = {"RMSE": rmse, "MAE": mae, "R2": r2}

    # Plot
    plt.figure(figsize=(6, 6))
    sns.scatterplot(x=y_test, y=y_pred, alpha=0.6)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--')
    plt.xlabel("Actual Yield")
    plt.ylabel("Predicted Yield")
    plt.title(f"{name} - Actual vs Predicted")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"models/{name}_prediction_plot.png")
    plt.show()

## 📈 Results Summary

In [None]:
pd.DataFrame(results).T