In [None]:
# Smart Agriculture Simulation: AI + IoT
# - Generates synthetic sensor data (soil moisture, temperature, humidity, light, pH, nutrients)
# - Trains a Random Forest regressor to predict crop yield
# - Preprocesses data with MinMax scaling
# - Simulates real-time sensor flow and recommendations (Irrigate/Fertilize/Optimal)
# - Displays a dashboard-style summary and feature importance plot

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Reproducibility
np.random.seed(42)

# Step 1: Generate synthetic sensor data
def generate_sensor_data(n_samples=500):
    data = {
        'soil_moisture': np.random.uniform(10, 60, n_samples),   # %
        'temperature':   np.random.uniform(15, 35, n_samples),    # °C
        'humidity':      np.random.uniform(30, 90, n_samples),    # %
        'light':         np.random.uniform(200, 1000, n_samples), # lux
        'pH':            np.random.uniform(5.5, 7.5, n_samples),  # pH
        'nutrients':     np.random.uniform(50, 200, n_samples),   # arbitrary units
    }
    df = pd.DataFrame(data)

    # Synthetic yield function (kg/ha): nonlinear + noise
    df['yield'] = (
        0.3  * df['soil_moisture'] +
        0.2  * df['temperature'] +
        0.1  * df['humidity'] +
        0.25 * (df['light'] / 1000) +
        0.15 * (7 - np.abs(df['pH'] - 6.5)) +
        0.4  * (df['nutrients'] / 100) +
        np.random.normal(0, 2, len(df))
    ) * 10

    return df

# Step 2: Preprocess (scaling)
def preprocess_data(df):
    features = df.drop('yield', axis=1)
    target = df['yield']
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(features)
    return X_scaled, target, scaler, features.columns.tolist()

# Step 3: Train regression model and evaluate
def train_and_evaluate(X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    model = RandomForestRegressor(n_estimators=200, random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return model, rmse, r2

# Step 4: Real-time simulation + recommendations
def simulate_real_time(model, scaler, n_samples=5):
    incoming = generate_sensor_data(n_samples).drop('yield', axis=1)
    X_new = scaler.transform(incoming)
    predictions = model.predict(X_new)
    incoming['Predicted Yield (kg/ha)'] = predictions

    # Simple rule-based recs (can be replaced with policy optimization)
    def recommend(row):
        if row['soil_moisture'] < 30:
            return 'Irrigate'
        if row['nutrients'] < 100:
            return 'Fertilize'
        if (row['pH'] < 6.0) or (row['pH'] > 7.0):
            return 'Adjust pH'
        return 'Optimal'

    incoming['Recommendation'] = incoming.apply(recommend, axis=1)
    return incoming

# Step 5: Run pipeline
df = generate_sensor_data()
X_scaled, y, scaler, feature_names = preprocess_data(df)
model, rmse, r2 = train_and_evaluate(X_scaled, y)
real_time_results = simulate_real_time(model, scaler, n_samples=8)

# Step 6: Dashboard-style output
print("=== Smart Agriculture Dashboard ===")
print(real_time_results.round(2).to_string(index=False))
print(f"\nModel RMSE: {rmse:.2f} kg/ha")
print(f"Model R²:   {r2:.3f}")

# Step 7: Feature importance visualization
importances = model.feature_importances_
plt.style.use('seaborn-v0_8')
plt.figure(figsize=(8, 5))
plt.barh(feature_names, importances, color='green')
plt.xlabel("Feature Importance")
plt.title("Sensor Influence on Crop Yield Prediction")
plt.tight_layout()
plt.savefig("feature_importance.png")  # saved locally
print("Feature importance plot saved as feature_importance.png")
