[//]: # ( Machine Learning for Horticulture Template )[//]: # ( License: MIT License )[//]: # ( Repository: https://github.com/outobecca/botanical-colabs )# 🔬 Machine Learning for Horticulture Template**Template Version 1.0** | Created: 2025-11-04## 📋 Overview**Purpose:** Template for building predictive models for crop yield, disease risk, and optimization.**Use this template for:** Yield prediction, disease risk assessment, parameter optimization, feature importance analysis### 🎯 Template StructureThis specialized template includes:- Pre-configured imports and dependencies- Standard helper functions for this workflow type- Sample data generation functions- Visualization templates- Export and citation sections### 📝 How to Use This Template1. Copy this notebook to create your analysis2. Update the header with your specific research question3. Modify sample data generators or add data loading4. Customize analysis and visualization sections5. Update citations with your data sources### ⚠️ Template Notes- Replace [brackets] with your specific content- Modify sample data to match your research- Add or remove sections as needed- Follow the established code style

## 📚 Background### Machine Learning in HorticultureML models help:- Predict crop outcomes- Optimize resource allocation- Identify risk factors- Support decision-makingThis notebook demonstrates scikit-learn for horticultural applications.### Methodology1. Data preparation2. Feature engineering3. Model training4. Validation5. Prediction6. Interpretation

## ⚙️ Step 1: Installation

In [None]:
!pip install -q pandas numpy matplotlib seaborn scikit-learn ipywidgetsimport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn.model_selection import train_test_split, cross_val_scorefrom sklearn.linear_model import LinearRegression, LogisticRegressionfrom sklearn.ensemble import RandomForestRegressor, RandomForestClassifierfrom sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_reportimport ipywidgets as widgetsfrom IPython.display import display, Markdownprint("✅ ML libraries loaded")

## 🔧 Step 2: Generate Training Data

In [None]:
# Generate sample dataset (FORM INPUT)print("🌱 SELECT PREDICTION TASK:")print("  [1] Yield Prediction (Regression)")print("  [2] Disease Risk (Classification)")print("  [3] Growth Optimization (Regression)")task = input("Enter choice (1-3): ").strip() or '1'# Generate datanp.random.seed(42)n_samples = 500# Features: temperature, humidity, soil_quality, fertilizer, irrigationtemp = np.random.normal(22, 5, n_samples)humidity = np.random.normal(65, 10, n_samples)soil_quality = np.random.uniform(3, 9, n_samples)fertilizer = np.random.uniform(20, 100, n_samples)irrigation = np.random.uniform(30, 80, n_samples)if task == '1':    # Yield prediction    yield_base = (        temp * 2 -         np.abs(temp - 22) * 1.5 +  # Optimal temp ~22        humidity * 0.3 +        soil_quality * 15 +        fertilizer * 0.5 +        irrigation * 0.4    )    target = yield_base + np.random.normal(0, 20, n_samples)    target_name = 'yield_kg'    task_type = 'regression'    elif task == '2':    # Disease risk (binary classification)    risk_score = (        (temp > 25) * 30 +  # High temp increases risk        (humidity > 75) * 40 +  # High humidity increases risk        (soil_quality < 5) * 20  # Poor soil increases risk    ) + np.random.normal(0, 10, n_samples)    target = (risk_score > 50).astype(int)    target_name = 'disease_risk'    task_type = 'classification'    else:    # Growth rate    growth = (        22 - np.abs(temp - 22) +  # Optimal temp        soil_quality * 2 +        fertilizer * 0.1 +        irrigation * 0.05    ) + np.random.normal(0, 2, n_samples)    target = np.clip(growth, 0, 50)    target_name = 'growth_rate'    task_type = 'regression'data = pd.DataFrame({    'temperature': temp,    'humidity': humidity,    'soil_quality': soil_quality,    'fertilizer_kg': fertilizer,    'irrigation_mm': irrigation,    target_name: target})print(f"\n✅ Generated {n_samples} samples for {task_type}")display(data.head(10))display(data.describe())

## 🚀 Step 3: Train Model

In [None]:
# Prepare dataX = data.drop(columns=[target_name])y = data[target_name]X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)print(f"Training set: {len(X_train)} samples")print(f"Test set: {len(X_test)} samples")# Train modelif task_type == 'regression':    model = RandomForestRegressor(n_estimators=100, random_state=42)    model.fit(X_train, y_train)        # Predictions    y_pred = model.predict(X_test)        # Metrics    mse = mean_squared_error(y_test, y_pred)    rmse = np.sqrt(mse)    r2 = r2_score(y_test, y_pred)        print(f"\n📊 Model Performance:")    print(f"  RMSE: {rmse:.2f}")    print(f"  R² Score: {r2:.3f}")        # Cross-validation    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='r2')    print(f"  CV R² (mean): {cv_scores.mean():.3f} ± {cv_scores.std():.3f}")    else:    model = RandomForestClassifier(n_estimators=100, random_state=42)    model.fit(X_train, y_train)        # Predictions    y_pred = model.predict(X_test)        # Metrics    accuracy = accuracy_score(y_test, y_pred)        print(f"\n📊 Model Performance:")    print(f"  Accuracy: {accuracy:.3f}")    print("\nClassification Report:")    print(classification_report(y_test, y_pred))print("\n✅ Model trained successfully")

## 📊 Step 4: Analysis & Visualization

In [None]:
# Feature importancefeature_importance = pd.DataFrame({    'feature': X.columns,    'importance': model.feature_importances_}).sort_values('importance', ascending=False)display(Markdown("### 🎯 Feature Importance"))display(feature_importance)plt.figure(figsize=(10, 6))plt.barh(feature_importance['feature'], feature_importance['importance'])plt.xlabel('Importance')plt.title('Feature Importance', fontweight='bold')plt.tight_layout()plt.show()# Predictions vs Actualif task_type == 'regression':    plt.figure(figsize=(10, 6))    plt.scatter(y_test, y_pred, alpha=0.5)    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)    plt.xlabel('Actual')    plt.ylabel('Predicted')    plt.title('Predictions vs Actual', fontweight='bold')    plt.grid(True, alpha=0.3)    plt.tight_layout()    plt.show()print("✅ Analysis complete")

## 📚 Step 5: Make Predictions

In [None]:
# Make new prediction (FORM INPUT)print("🔮 ENTER VALUES FOR PREDICTION:")new_temp = float(input("Temperature (°C, e.g., 22): ").strip() or "22")new_humidity = float(input("Humidity (%, e.g., 65): ").strip() or "65")new_soil = float(input("Soil quality (1-10, e.g., 7): ").strip() or "7")new_fert = float(input("Fertilizer (kg, e.g., 50): ").strip() or "50")new_irrig = float(input("Irrigation (mm, e.g., 40): ").strip() or "40")new_data = pd.DataFrame([[new_temp, new_humidity, new_soil, new_fert, new_irrig]],                        columns=X.columns)prediction = model.predict(new_data)[0]print(f"\n🎯 PREDICTION:")if task_type == 'regression':    print(f"  {target_name}: {prediction:.2f}")else:    print(f"  {target_name}: {'HIGH RISK' if prediction == 1 else 'LOW RISK'}")display(Markdown(f"""### 📖 Citation> Botanical Colabs (2025). Crop Yield Prediction & Statistical Modeling.> https://github.com/outobecca/botanical-colabs### 📚 Libraries- scikit-learn (BSD License)- Pandas, NumPy (BSD License)"""))