<a href="https://colab.research.google.com/github/sineka232/data_science/blob/main/Precision_Agriculture_Yield_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# --- 1. EMBEDDED DATA LOADING ---
# Embedding the content of 'sample_agri_data.csv' to make the script self-contained.
# N=5 samples, T=10 timesteps, F=3 features (NDVI, Rainfall, Temp).
csv_data = """
NDVI_T1,Rainfall_T1,Temp_T1,NDVI_T2,Rainfall_T2,Temp_T2,NDVI_T3,Rainfall_T3,Temp_T3,NDVI_T4,Rainfall_T4,Temp_T4,NDVI_T5,Rainfall_T5,Temp_T5,NDVI_T6,Rainfall_T6,Temp_T6,NDVI_T7,Rainfall_T7,Temp_T7,NDVI_T8,Rainfall_T8,Temp_T8,NDVI_T9,Rainfall_T9,Temp_T9,NDVI_T10,Rainfall_T10,Temp_T10,Yield_Ton_Ha
0.25,2.1,20.5,0.32,1.8,21.0,0.45,5.5,21.2,0.61,12.3,22.0,0.75,8.9,23.1,0.85,3.4,24.5,0.88,0.5,25.0,0.86,0.2,25.5,0.80,0.0,26.0,0.72,1.1,26.2,18.5
0.20,5.5,19.8,0.30,4.1,20.1,0.40,6.2,20.9,0.55,3.0,21.5,0.65,1.5,22.8,0.70,2.0,23.5,0.68,1.0,24.0,0.65,0.8,24.5,0.62,1.1,25.1,0.60,0.5,25.5,12.4
0.18,1.0,21.5,0.25,0.5,22.0,0.30,0.8,23.5,0.38,0.1,25.5,0.45,0.0,28.0,0.50,0.0,30.1,0.55,0.0,31.5,0.48,0.0,32.0,0.40,0.0,32.5,0.35,0.0,33.0,6.1
0.30,4.5,20.1,0.45,6.0,20.5,0.60,8.1,21.0,0.78,5.9,21.8,0.89,4.2,22.5,0.91,3.0,23.0,0.90,2.5,23.5,0.89,1.5,24.0,0.87,1.0,24.2,0.85,0.5,24.5,21.9
0.22,3.0,19.5,0.35,3.5,20.0,0.50,4.0,20.5,0.68,5.1,21.0,0.80,6.0,21.5,0.82,4.5,22.0,0.75,3.0,23.0,0.60,2.1,24.0,0.55,1.5,24.5,0.50,1.0,25.0,10.5
"""

def load_and_preprocess_data(csv_string):
    """
    Loads data and prepares features for both Random Forest (2D) and CNN (3D).

    Args:
        csv_string (str): The raw CSV data as a string.

    Returns:
        tuple: X_rf, Y, X_cnn (feature matrices for RF and CNN, and target vector).
    """
    # Load data into a Pandas DataFrame
    from io import StringIO
    df = pd.read_csv(StringIO(csv_string))

    # Separate features (X) from the target (Y)
    Y = df['Yield_Ton_Ha'].values
    X_df = df.drop(columns=['Yield_Ton_Ha'])

    # --- Data Preparation for Random Forest (RF) ---
    # RF uses the data in its current flattened 2D format: (N_samples, N_features)
    # Shape: (5, 30)
    X_rf = X_df.values

    # --- Data Preparation for Convolutional Neural Network (CNN) ---
    # CNN expects 3D data: (N_samples, N_timesteps, N_features)
    N_SAMPLES = X_df.shape[0] # Number of farm fields (5)
    N_TIMESTEPS = 10         # Number of weeks/measurements (T1 to T10)
    N_FEATURES = 3           # Number of features (NDVI, Rainfall, Temp)

    # Reshape the data from (5, 30) to (5, 10, 3)
    X_cnn = X_rf.reshape((N_SAMPLES, N_TIMESTEPS, N_FEATURES))

    print(f"Dataset loaded and prepared:")
    print(f"- Total Samples (N): {N_SAMPLES}")
    print(f"- RF Input Shape (Flattened): {X_rf.shape}")
    print(f"- CNN Input Shape (Time Series): {X_cnn.shape}\n")

    return X_rf, Y, X_cnn

def train_and_evaluate_rf(X, Y):
    """
    Trains and evaluates a Random Forest Regressor.
    (Note: With only 5 samples, this is purely for demonstration.)
    """
    print("--- Training Random Forest Regressor (2D Input) ---")

    # Split the data (using all 5 samples for training, as splitting is difficult with N=5)
    # In a real scenario, you'd use a much larger dataset and cross-validation.
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    if len(X_test) == 0:
        print("Note: Skipping train/test split due to very small sample size (N=5). Training on full dataset.")
        X_train, Y_train = X, Y
        # We simulate a prediction on the training set for demonstration purposes
        X_test, Y_test = X, Y

    # Initialize and train the Random Forest Regressor
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, Y_train)

    # Predict and evaluate
    Y_pred = model.predict(X_test)

    # Ensure there are enough test samples to calculate MSE/R2 accurately
    if len(Y_test) > 1:
        mse = mean_squared_error(Y_test, Y_pred)
        r2 = r2_score(Y_test, Y_pred)

        print(f"Model Metrics (on simulated test/train data):")
        print(f"  Mean Squared Error (MSE): {mse:.2f}")
        print(f"  R-squared (R2) Score: {r2:.2f}")
    else:
        print("Model trained successfully.")

    print("\n--- Feature Importance (RF) ---")
    # Display the most important features (the features are columns 0-29 in the flattened data)
    feature_importances = model.feature_importances_
    sorted_indices = np.argsort(feature_importances)[::-1]

    # Print the top 5 most important features by column index
    print("Top 5 Feature Indices by Importance (Flattened Index 0-29):")
    for i in sorted_indices[:5]:
        print(f"  Feature Index {i}: Importance {feature_importances[i]:.4f}")


def conceptual_cnn_prep(X_cnn):
    """
    Demonstrates the final step of data prep for the CNN model.
    """
    print("--- Conceptual CNN Data Preparation ---")
    print("CNN Input Data (X_cnn) is now ready with shape (Samples, Timesteps, Features).")
    print("The next step in a CNN model would be to normalize X_cnn (e.g., MinMax scale) ")
    print("and then feed it to a Keras/TensorFlow 1D or 2D Convolutional layer.\n")
    print("Example of the first sample's time-series data:")
    print(X_cnn[0])


if __name__ == "__main__":
    # 1. Load and prepare the data
    X_rf_data, Y_data, X_cnn_data = load_and_preprocess_data(csv_data)

    # 2. Train the Random Forest Model
    train_and_evaluate_rf(X_rf_data, Y_data)

    # 3. Conceptual CNN preparation (no training implemented here)
    conceptual_cnn_prep(X_cnn_data)

Dataset loaded and prepared:
- Total Samples (N): 5
- RF Input Shape (Flattened): (5, 30)
- CNN Input Shape (Time Series): (5, 10, 3)

--- Training Random Forest Regressor (2D Input) ---
Model trained successfully.

--- Feature Importance (RF) ---
Top 5 Feature Indices by Importance (Flattened Index 0-29):
  Feature Index 0: Importance 0.0880
  Feature Index 2: Importance 0.0703
  Feature Index 27: Importance 0.0659
  Feature Index 21: Importance 0.0603
  Feature Index 26: Importance 0.0576
--- Conceptual CNN Data Preparation ---
CNN Input Data (X_cnn) is now ready with shape (Samples, Timesteps, Features).
The next step in a CNN model would be to normalize X_cnn (e.g., MinMax scale) 
and then feed it to a Keras/TensorFlow 1D or 2D Convolutional layer.

Example of the first sample's time-series data:
[[ 0.25  2.1  20.5 ]
 [ 0.32  1.8  21.  ]
 [ 0.45  5.5  21.2 ]
 [ 0.61 12.3  22.  ]
 [ 0.75  8.9  23.1 ]
 [ 0.85  3.4  24.5 ]
 [ 0.88  0.5  25.  ]
 [ 0.86  0.2  25.5 ]
 [ 0.8   0.   26.  ]

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
# Imports for the Convolutional Neural Network (CNN)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense

# --- 1. EMBEDDED DATA LOADING ---
# Embedding the content of 'sample_agri_data.csv' to make the script self-contained.
# N=5 samples, T=10 timesteps, F=3 features (NDVI, Rainfall, Temp).
csv_data = """
NDVI_T1,Rainfall_T1,Temp_T1,NDVI_T2,Rainfall_T2,Temp_T2,NDVI_T3,Rainfall_T3,Temp_T3,NDVI_T4,Rainfall_T4,Temp_T4,NDVI_T5,Rainfall_T5,Temp_T5,NDVI_T6,Rainfall_T6,Temp_T6,NDVI_T7,Rainfall_T7,Temp_T7,NDVI_T8,Rainfall_T8,Temp_T8,NDVI_T9,Rainfall_T9,Temp_T9,NDVI_T10,Rainfall_T10,Temp_T10,Yield_Ton_Ha
0.25,2.1,20.5,0.32,1.8,21.0,0.45,5.5,21.2,0.61,12.3,22.0,0.75,8.9,23.1,0.85,3.4,24.5,0.88,0.5,25.0,0.86,0.2,25.5,0.80,0.0,26.0,0.72,1.1,26.2,18.5
0.20,5.5,19.8,0.30,4.1,20.1,0.40,6.2,20.9,0.55,3.0,21.5,0.65,1.5,22.8,0.70,2.0,23.5,0.68,1.0,24.0,0.65,0.8,24.5,0.62,1.1,25.1,0.60,0.5,25.5,12.4
0.18,1.0,21.5,0.25,0.5,22.0,0.30,0.8,23.5,0.38,0.1,25.5,0.45,0.0,28.0,0.50,0.0,30.1,0.55,0.0,31.5,0.48,0.0,32.0,0.40,0.0,32.5,0.35,0.0,33.0,6.1
0.30,4.5,20.1,0.45,6.0,20.5,0.60,8.1,21.0,0.78,5.9,21.8,0.89,4.2,22.5,0.91,3.0,23.0,0.90,2.5,23.5,0.89,1.5,24.0,0.87,1.0,24.2,0.85,0.5,24.5,21.9
0.22,3.0,19.5,0.35,3.5,20.0,0.50,4.0,20.5,0.68,5.1,21.0,0.80,6.0,21.5,0.82,4.5,22.0,0.75,3.0,23.0,0.60,2.1,24.0,0.55,1.5,24.5,0.50,1.0,25.0,10.5
"""

def load_and_preprocess_data(csv_string):
    """
    Loads data and prepares features for both Random Forest (2D) and CNN (3D).

    Args:
        csv_string (str): The raw CSV data as a string.

    Returns:
        tuple: X_rf, Y, X_cnn (feature matrices for RF and CNN, and target vector).
    """
    # Load data into a Pandas DataFrame
    from io import StringIO
    df = pd.read_csv(StringIO(csv_string))

    # Separate features (X) from the target (Y)
    Y = df['Yield_Ton_Ha'].values
    X_df = df.drop(columns=['Yield_Ton_Ha'])

    # --- Data Preparation for Random Forest (RF) ---
    # RF uses the data in its current flattened 2D format: (N_samples, N_features)
    # Shape: (5, 30)
    X_rf = X_df.values

    # --- Data Preparation for Convolutional Neural Network (CNN) ---
    # CNN expects 3D data: (N_samples, N_timesteps, N_features)
    N_SAMPLES = X_df.shape[0] # Number of farm fields (5)
    N_TIMESTEPS = 10         # Number of weeks/measurements (T1 to T10)
    N_FEATURES = 3           # Number of features (NDVI, Rainfall, Temp)

    # Reshape the data from (5, 30) to (5, 10, 3)
    X_cnn = X_rf.reshape((N_SAMPLES, N_TIMESTEPS, N_FEATURES))

    print(f"Dataset loaded and prepared:")
    print(f"- Total Samples (N): {N_SAMPLES}")
    print(f"- RF Input Shape (Flattened): {X_rf.shape}")
    print(f"- CNN Input Shape (Time Series): {X_cnn.shape}\n")

    return X_rf, Y, X_cnn

def train_and_evaluate_rf(X, Y):
    """
    Trains and evaluates a Random Forest Regressor.
    (Note: With only 5 samples, this is purely for demonstration.)
    """
    print("--- Training Random Forest Regressor (2D Input) ---")

    # Split the data (using all 5 samples for training, as splitting is difficult with N=5)
    # In a real scenario, you'd use a much larger dataset and cross-validation.
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    if len(X_test) == 0:
        print("Note: Skipping train/test split due to very small sample size (N=5). Training on full dataset.")
        X_train, Y_train = X, Y
        # We simulate a prediction on the training set for demonstration purposes
        X_test, Y_test = X, Y

    # Initialize and train the Random Forest Regressor
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, Y_train)

    # Predict and evaluate
    Y_pred = model.predict(X_test)

    # Ensure there are enough test samples to calculate MSE/R2 accurately
    if len(Y_test) > 1:
        mse = mean_squared_error(Y_test, Y_pred)
        r2 = r2_score(Y_test, Y_pred)

        print(f"Model Metrics (on simulated test/train data):")
        print(f"  Mean Squared Error (MSE): {mse:.2f}")
        print(f"  R-squared (R2) Score: {r2:.2f}")
    else:
        print("Model trained successfully.")

    print("\n--- Feature Importance (RF) ---")
    # Display the most important features (the features are columns 0-29 in the flattened data)
    feature_importances = model.feature_importances_
    sorted_indices = np.argsort(feature_importances)[::-1]

    # Print the top 5 most important features by column index
    print("Top 5 Feature Indices by Importance (Flattened Index 0-29):")
    for i in sorted_indices[:5]:
        print(f"  Feature Index {i}: Importance {feature_importances[i]:.4f}")


def train_and_evaluate_cnn(X, Y):
    """
    Trains and evaluates a simple 1D Convolutional Neural Network (CNN)
    designed for time-series data.
    """
    print("\n--- Training 1D Convolutional Neural Network (3D Input) ---")

    # The data must be in (samples, timesteps, features) format, which is X_cnn.
    # X shape: (5, 10, 3). Timesteps=10, Features=3
    N_TIMESTEPS = X.shape[1]
    N_FEATURES = X.shape[2]

    # Split the data for the CNN (again, highly constrained by N=5)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    if len(X_test) == 0:
        print("Note: Skipping train/test split due to very small sample size (N=5). Training on full dataset.")
        X_train, Y_train = X, Y
        X_test, Y_test = X, Y

    # 1. Define the CNN Model Architecture
    model = Sequential([
        # 1D Convolutional layer to extract features across the 10 timesteps
        Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(N_TIMESTEPS, N_FEATURES)),
        MaxPooling1D(pool_size=2), # Reduce the time dimension

        # Flatten the output for the Dense layers
        Flatten(),

        # Fully connected (Dense) layers for prediction
        Dense(50, activation='relu'),
        Dense(1) # Output layer with 1 unit for regression (Yield prediction)
    ])

    # 2. Compile the model
    # Use 'adam' optimizer and 'mse' (Mean Squared Error) for regression
    model.compile(optimizer='adam', loss='mse')

    print("Model Summary:")
    model.summary()

    # 3. Train the model
    # Note: epochs are set low to keep the execution fast, but would be higher in reality.
    # The 'verbose=0' argument suppresses the detailed training output in the console.
    print("\nStarting CNN training (5 epochs)...")
    history = model.fit(
        X_train, Y_train,
        epochs=5,
        batch_size=1,
        verbose=0 # Set to 1 to see progress per epoch
    )
    print("Training finished.")

    # 4. Evaluate the model (using the simulated test set)
    Y_pred = model.predict(X_test, verbose=0).flatten()

    if len(Y_test) > 1:
        mse = mean_squared_error(Y_test, Y_pred)
        r2 = r2_score(Y_test, Y_pred)

        print(f"\nModel Metrics (on simulated test/train data):")
        print(f"  Mean Squared Error (MSE): {mse:.2f}")
        print(f"  R-squared (R2) Score: {r2:.2f}")
    else:
        print("Model trained successfully. Evaluation metrics were skipped due to limited data.")


if __name__ == "__main__":
    # 1. Load and prepare the data
    X_rf_data, Y_data, X_cnn_data = load_and_preprocess_data(csv_data)

    # 2. Train the Random Forest Model
    train_and_evaluate_rf(X_rf_data, Y_data)

    # 3. Train and Evaluate the CNN Model (The new step!)
    train_and_evaluate_cnn(X_cnn_data, Y_data)


Dataset loaded and prepared:
- Total Samples (N): 5
- RF Input Shape (Flattened): (5, 30)
- CNN Input Shape (Time Series): (5, 10, 3)

--- Training Random Forest Regressor (2D Input) ---
Model trained successfully.

--- Feature Importance (RF) ---
Top 5 Feature Indices by Importance (Flattened Index 0-29):
  Feature Index 0: Importance 0.0880
  Feature Index 2: Importance 0.0703
  Feature Index 27: Importance 0.0659
  Feature Index 21: Importance 0.0603
  Feature Index 26: Importance 0.0576

--- Training 1D Convolutional Neural Network (3D Input) ---
Model Summary:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Starting CNN training (5 epochs)...
Training finished.
Model trained successfully. Evaluation metrics were skipped due to limited data.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
# Imports for the Convolutional Neural Network (CNN)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout # Added Dropout

# --- 1. EMBEDDED DATA LOADING (EXPANDED TO N=50 SAMPLES) ---
# N=50 samples, T=10 timesteps, F=3 features (NDVI, Rainfall, Temp).
csv_data = """
NDVI_T1,Rainfall_T1,Temp_T1,NDVI_T2,Rainfall_T2,Temp_T2,NDVI_T3,Rainfall_T3,Temp_T3,NDVI_T4,Rainfall_T4,Temp_T4,NDVI_T5,Rainfall_T5,Temp_T5,NDVI_T6,Rainfall_T6,Temp_T6,NDVI_T7,Rainfall_T7,Temp_T7,NDVI_T8,Rainfall_T8,Temp_T8,NDVI_T9,Rainfall_T9,Temp_T9,NDVI_T10,Rainfall_T10,Temp_T10,Yield_Ton_Ha
0.25,2.1,20.5,0.32,1.8,21.0,0.45,5.5,21.2,0.61,12.3,22.0,0.75,8.9,23.1,0.85,3.4,24.5,0.88,0.5,25.0,0.86,0.2,25.5,0.80,0.0,26.0,0.72,1.1,26.2,18.5
0.20,5.5,19.8,0.30,4.1,20.1,0.40,6.2,20.9,0.55,3.0,21.5,0.65,1.5,22.8,0.70,2.0,23.5,0.68,1.0,24.0,0.65,0.8,24.5,0.62,1.1,25.1,0.60,0.5,25.5,12.4
0.18,1.0,21.5,0.25,0.5,22.0,0.30,0.8,23.5,0.38,0.1,25.5,0.45,0.0,28.0,0.50,0.0,30.1,0.55,0.0,31.5,0.48,0.0,32.0,0.40,0.0,32.5,0.35,0.0,33.0,6.1
0.30,4.5,20.1,0.45,6.0,20.5,0.60,8.1,21.0,0.78,5.9,21.8,0.89,4.2,22.5,0.91,3.0,23.0,0.90,2.5,23.5,0.89,1.5,24.0,0.87,1.0,24.2,0.85,0.5,24.5,21.9
0.22,3.0,19.5,0.35,3.5,20.0,0.50,4.0,20.5,0.68,5.1,21.0,0.80,6.0,21.5,0.82,4.5,22.0,0.75,3.0,23.0,0.60,2.1,24.0,0.55,1.5,24.5,0.50,1.0,25.0,10.5
0.28,3.5,21.0,0.38,4.0,21.5,0.51,6.0,21.8,0.70,10.0,22.5,0.82,7.5,23.5,0.90,3.0,25.0,0.93,0.8,25.5,0.90,0.4,26.0,0.84,0.1,26.5,0.78,1.5,26.8,19.2
0.15,6.0,19.0,0.25,4.5,19.5,0.35,7.0,20.0,0.50,3.5,20.8,0.60,1.8,22.0,0.65,2.5,22.5,0.63,1.2,23.0,0.60,1.0,23.5,0.58,1.4,24.0,0.55,0.7,24.5,11.8
0.35,1.5,22.0,0.40,0.8,22.5,0.48,1.1,24.0,0.58,0.2,26.0,0.65,0.1,28.5,0.70,0.1,30.5,0.75,0.1,32.0,0.68,0.1,32.5,0.60,0.1,33.0,0.55,0.1,33.5,7.5
0.27,5.0,20.0,0.42,6.5,20.8,0.58,8.8,21.5,0.75,6.5,22.2,0.85,4.8,23.0,0.88,3.5,23.5,0.87,3.0,24.0,0.86,2.0,24.5,0.84,1.5,24.8,0.80,1.0,25.0,20.5
0.20,3.5,19.0,0.33,4.0,19.5,0.48,4.5,20.0,0.65,5.5,20.5,0.78,6.5,21.0,0.80,5.0,21.5,0.75,3.5,22.5,0.65,2.5,23.5,0.60,2.0,24.0,0.55,1.5,24.5,14.0
0.26,2.0,20.8,0.34,1.7,21.2,0.47,5.3,21.4,0.63,12.0,22.2,0.77,8.5,23.3,0.87,3.2,24.7,0.90,0.6,25.2,0.88,0.3,25.7,0.82,0.1,26.2,0.74,1.3,26.4,18.8
0.21,5.3,19.7,0.31,4.0,20.0,0.41,6.0,20.8,0.56,2.9,21.4,0.66,1.4,22.7,0.71,1.9,23.4,0.69,1.1,23.9,0.66,0.9,24.4,0.63,1.2,25.0,0.61,0.6,25.4,12.6
0.19,1.1,21.6,0.26,0.6,22.1,0.31,0.9,23.6,0.39,0.2,25.6,0.46,0.1,28.1,0.51,0.1,30.2,0.56,0.1,31.6,0.49,0.1,32.1,0.41,0.1,32.6,0.36,0.1,33.1,6.3
0.31,4.4,20.2,0.46,5.9,20.6,0.61,8.0,21.1,0.79,5.8,21.9,0.90,4.1,22.6,0.92,3.1,23.1,0.91,2.6,23.6,0.90,1.6,24.1,0.88,1.1,24.3,0.86,0.6,24.6,22.1
0.23,3.1,19.6,0.36,3.6,20.1,0.51,4.1,20.6,0.69,5.2,21.1,0.81,6.1,21.6,0.83,4.6,22.1,0.76,3.1,23.1,0.61,2.2,24.1,0.56,1.6,24.6,0.51,1.1,25.1,10.7
0.29,3.6,21.1,0.39,4.1,21.6,0.52,6.1,21.9,0.71,10.1,22.6,0.83,7.6,23.6,0.91,3.1,25.1,0.94,0.9,25.6,0.91,0.5,26.1,0.85,0.2,26.6,0.79,1.6,26.9,19.4
0.16,6.1,19.1,0.26,4.6,19.6,0.36,7.1,20.1,0.51,3.6,20.9,0.61,1.9,22.1,0.66,2.6,22.6,0.64,1.3,23.1,0.61,1.1,23.6,0.59,1.5,24.1,0.56,0.8,24.6,12.0
0.36,1.6,22.1,0.41,0.9,22.6,0.49,1.2,24.1,0.59,0.3,26.1,0.66,0.2,28.6,0.71,0.2,30.6,0.76,0.2,32.1,0.69,0.2,32.6,0.61,0.2,33.1,0.56,0.2,33.6,7.7
0.28,5.1,20.1,0.43,6.6,20.9,0.59,8.9,21.6,0.76,6.6,22.3,0.86,4.9,23.1,0.89,3.6,23.6,0.88,3.1,24.1,0.87,2.1,24.6,0.85,1.6,24.9,0.81,1.1,25.1,20.7
0.21,3.6,19.1,0.34,4.1,19.6,0.49,4.6,20.1,0.66,5.6,20.6,0.79,6.6,21.1,0.81,5.1,21.6,0.76,3.6,22.6,0.66,2.6,23.6,0.61,2.1,24.1,0.56,1.6,24.6,14.2
0.30,2.5,21.0,0.38,2.2,21.5,0.51,5.9,21.7,0.67,12.7,22.5,0.81,9.3,23.6,0.91,3.8,25.0,0.94,0.9,25.5,0.92,0.6,26.0,0.86,0.2,26.5,0.78,1.3,26.7,19.8
0.22,5.9,19.5,0.32,4.5,19.8,0.42,6.6,20.5,0.57,3.4,21.1,0.67,1.8,22.4,0.72,2.3,23.1,0.70,1.4,23.6,0.67,1.2,24.1,0.64,1.5,24.7,0.62,0.8,25.1,13.0
0.17,1.5,21.3,0.24,0.9,21.8,0.29,1.2,23.2,0.37,0.5,25.2,0.44,0.3,27.7,0.49,0.3,29.8,0.54,0.3,31.2,0.47,0.3,31.7,0.39,0.3,32.2,0.34,0.3,32.7,6.8
0.33,4.8,19.9,0.48,6.3,20.3,0.63,8.4,20.8,0.81,6.2,21.6,0.92,4.5,22.3,0.94,3.4,22.8,0.93,2.9,23.3,0.92,1.9,23.8,0.90,1.4,24.0,0.88,0.9,24.3,22.5
0.25,3.4,19.3,0.38,3.9,19.8,0.53,4.4,20.3,0.71,5.5,20.8,0.83,6.4,21.3,0.85,4.9,21.8,0.78,3.4,22.8,0.63,2.4,23.8,0.58,1.8,24.3,0.53,1.3,24.8,11.2
0.27,3.8,21.3,0.37,4.3,21.8,0.50,6.3,22.1,0.69,10.3,22.8,0.81,7.8,23.8,0.89,3.3,25.3,0.92,1.1,25.8,0.89,0.7,26.3,0.83,0.4,26.8,0.77,1.8,27.1,19.0
0.14,6.3,19.3,0.24,4.8,19.8,0.34,7.3,20.3,0.49,3.8,21.1,0.59,2.1,22.3,0.64,2.8,22.8,0.62,1.5,23.3,0.59,1.3,23.8,0.57,1.7,24.3,0.54,1.0,24.8,12.2
0.34,1.8,22.3,0.39,1.1,22.8,0.47,1.4,24.3,0.57,0.5,26.3,0.64,0.4,28.8,0.69,0.4,30.8,0.74,0.4,32.3,0.67,0.4,32.8,0.59,0.4,33.3,0.54,0.4,33.8,7.9
0.26,5.3,20.3,0.41,6.8,21.1,0.57,9.1,21.8,0.74,6.8,22.5,0.84,5.1,23.3,0.87,3.8,23.8,0.86,3.3,24.3,0.85,2.3,24.8,0.83,1.8,25.1,0.79,1.3,25.3,20.3
0.19,3.8,19.3,0.32,4.3,19.8,0.47,4.8,20.3,0.64,5.8,20.8,0.77,6.8,21.3,0.79,5.3,21.8,0.74,3.8,22.8,0.64,2.8,23.8,0.59,2.3,24.3,0.54,1.8,24.8,14.4
0.24,2.3,20.7,0.31,2.0,21.2,0.44,5.7,21.4,0.60,12.5,22.2,0.74,9.1,23.3,0.84,3.6,24.7,0.87,0.7,25.2,0.85,0.4,25.7,0.79,0.1,26.2,0.71,1.3,26.4,18.2
0.19,5.7,19.9,0.29,4.3,20.2,0.39,6.4,21.0,0.54,3.2,21.6,0.64,1.7,22.9,0.69,2.2,23.6,0.67,1.3,24.1,0.64,1.1,24.6,0.61,1.3,25.2,0.59,0.7,25.6,12.8
0.16,1.3,21.8,0.23,0.7,22.3,0.28,1.0,23.8,0.36,0.3,25.8,0.43,0.2,28.3,0.48,0.2,30.4,0.53,0.2,31.8,0.46,0.2,32.3,0.38,0.2,32.8,0.33,0.2,33.3,6.5
0.32,4.6,20.4,0.47,6.1,20.8,0.62,8.2,21.3,0.80,6.0,22.1,0.91,4.3,22.8,0.93,3.2,23.3,0.92,2.7,23.8,0.91,1.7,24.3,0.89,1.2,24.5,0.87,0.7,24.8,22.3
0.24,3.2,19.8,0.37,3.7,20.3,0.52,4.2,20.8,0.70,5.3,21.3,0.82,6.2,21.8,0.84,4.7,22.3,0.77,3.2,23.3,0.62,2.3,24.3,0.57,1.7,24.8,0.52,1.2,25.3,10.9
0.26,3.7,21.2,0.36,4.2,21.7,0.49,6.2,22.0,0.68,10.2,22.7,0.80,7.7,23.7,0.88,3.2,25.2,0.91,1.0,25.7,0.88,0.6,26.2,0.82,0.3,26.7,0.76,1.7,27.0,18.8
0.13,6.2,19.2,0.23,4.7,19.7,0.33,7.2,20.2,0.48,3.7,21.0,0.58,2.0,22.2,0.63,2.7,22.7,0.61,1.4,23.2,0.58,1.2,23.7,0.56,1.6,24.2,0.53,0.9,24.7,12.1
0.35,1.7,22.2,0.40,1.0,22.7,0.48,1.3,24.2,0.58,0.4,26.2,0.65,0.3,28.7,0.70,0.3,30.7,0.75,0.3,32.2,0.68,0.3,32.7,0.60,0.3,33.2,0.55,0.3,33.7,7.8
0.25,5.2,20.2,0.40,6.7,21.0,0.56,9.0,21.7,0.73,6.7,22.4,0.83,5.0,23.2,0.86,3.7,23.7,0.85,3.2,24.2,0.84,2.2,24.7,0.82,1.7,25.0,0.78,1.2,25.2,20.1
0.18,3.7,19.2,0.31,4.2,19.7,0.46,4.7,20.2,0.63,5.7,20.7,0.76,6.7,21.2,0.78,5.2,21.7,0.73,3.7,22.7,0.63,2.7,23.7,0.58,2.2,24.2,0.53,1.7,24.7,14.3
0.27,2.2,20.9,0.35,1.9,21.4,0.48,5.6,21.6,0.64,12.4,22.4,0.78,9.0,23.5,0.88,3.5,24.9,0.91,0.6,25.4,0.89,0.3,25.9,0.83,0.1,26.4,0.75,1.2,26.6,18.6
0.22,5.8,19.7,0.32,4.4,20.0,0.42,6.5,20.7,0.57,3.3,21.3,0.67,1.6,22.6,0.72,2.1,23.3,0.70,1.2,23.8,0.67,1.0,24.3,0.64,1.4,24.9,0.62,0.6,25.3,12.9
0.17,1.4,21.7,0.24,0.8,22.2,0.29,1.1,23.7,0.37,0.4,25.7,0.44,0.3,28.2,0.49,0.3,30.3,0.54,0.3,31.7,0.47,0.3,32.2,0.39,0.3,32.7,0.34,0.3,33.2,6.6
0.34,4.7,20.3,0.49,6.2,20.7,0.64,8.3,21.2,0.82,6.1,22.0,0.93,4.4,22.7,0.95,3.3,23.2,0.94,2.8,23.7,0.93,1.8,24.2,0.91,1.3,24.4,0.89,0.8,24.7,22.7
0.26,3.3,19.7,0.39,3.8,20.2,0.54,4.3,20.7,0.72,5.4,21.2,0.84,6.3,21.7,0.86,4.8,22.2,0.79,3.3,23.2,0.64,2.4,24.2,0.59,1.8,24.7,0.54,1.3,25.2,11.1
0.28,3.9,21.4,0.38,4.4,21.9,0.51,6.4,22.2,0.70,10.4,22.9,0.82,7.9,23.9,0.90,3.4,25.4,0.93,1.2,25.9,0.90,0.8,26.4,0.84,0.5,26.9,0.78,1.9,27.2,19.1
0.15,6.4,19.4,0.25,4.9,19.9,0.35,7.4,20.4,0.50,3.9,21.2,0.60,2.2,22.4,0.65,2.9,22.9,0.63,1.6,23.4,0.60,1.4,23.9,0.58,1.8,24.4,0.55,1.1,24.9,12.3
0.36,1.9,22.4,0.41,1.2,22.9,0.49,1.5,24.4,0.59,0.6,26.4,0.66,0.5,28.9,0.71,0.5,30.9,0.76,0.5,32.4,0.69,0.5,32.9,0.61,0.5,33.4,0.56,0.5,33.9,8.0
0.27,5.4,20.4,0.42,6.9,21.2,0.58,9.2,21.9,0.75,6.9,22.6,0.85,5.2,23.4,0.88,3.9,23.9,0.87,3.4,24.4,0.86,2.4,24.9,0.84,1.9,25.2,0.80,1.4,25.4,20.5
0.20,3.9,19.4,0.33,4.4,19.9,0.48,4.9,20.4,0.65,5.9,20.9,0.78,6.9,21.4,0.80,5.4,21.9,0.75,3.9,22.9,0.65,2.9,23.9,0.60,2.4,24.4,0.55,1.9,24.9,14.5
0.29,2.4,21.1,0.37,2.1,21.6,0.50,5.8,21.8,0.66,12.6,22.6,0.80,9.2,23.7,0.90,3.7,25.1,0.93,0.8,25.6,0.91,0.5,26.1,0.85,0.2,26.6,0.77,1.4,26.8,19.6
0.24,5.6,19.6,0.34,4.2,19.9,0.44,6.3,20.6,0.59,3.1,21.2,0.69,1.4,22.5,0.74,1.8,23.2,0.72,0.9,23.7,0.69,0.7,24.2,0.66,1.0,24.8,0.64,0.4,25.2,13.2
0.15,1.6,21.4,0.22,1.0,21.9,0.27,1.3,23.3,0.35,0.6,25.3,0.42,0.4,27.8,0.47,0.4,29.9,0.52,0.4,31.3,0.45,0.4,31.8,0.37,0.4,32.3,0.32,0.4,32.8,7.0
0.35,4.9,20.0,0.50,6.4,20.4,0.65,8.5,20.9,0.83,6.3,21.7,0.94,4.6,22.4,0.96,3.5,22.9,0.95,3.0,23.4,0.94,2.0,23.9,0.92,1.5,24.1,0.90,1.0,24.4,22.9
0.28,3.5,19.4,0.41,4.0,19.9,0.56,4.5,20.4,0.74,5.6,20.9,0.86,6.5,21.4,0.88,5.0,21.9,0.81,3.5,22.9,0.66,2.6,23.9,0.61,2.0,24.4,0.56,1.5,24.9,11.4
0.29,4.0,21.5,0.39,4.5,22.0,0.52,6.5,22.3,0.71,10.5,23.0,0.83,8.0,24.0,0.91,3.5,25.5,0.94,1.3,26.0,0.91,0.9,26.5,0.85,0.6,27.0,0.79,2.0,27.3,19.3
0.16,6.5,19.5,0.26,5.0,20.0,0.36,7.5,20.5,0.51,4.0,21.3,0.61,2.3,22.5,0.66,3.0,23.0,0.64,1.7,23.5,0.61,1.5,24.0,0.59,1.9,24.5,0.56,1.2,25.0,12.5
0.37,2.0,22.5,0.42,1.3,23.0,0.50,1.6,24.5,0.60,0.7,26.5,0.67,0.6,29.0,0.72,0.6,31.0,0.77,0.6,32.5,0.70,0.6,33.0,0.62,0.6,33.5,0.57,0.6,34.0,8.2
0.28,5.5,20.5,0.43,7.0,21.3,0.59,9.3,22.0,0.76,7.0,22.7,0.86,5.3,23.5,0.89,4.0,24.0,0.88,3.5,24.5,0.87,2.5,25.0,0.85,2.0,25.3,0.81,1.5,25.5,20.7
0.21,4.0,19.5,0.34,4.5,20.0,0.49,5.0,20.5,0.66,6.0,21.0,0.79,7.0,21.5,0.81,5.5,22.0,0.76,4.0,23.0,0.66,3.0,24.0,0.61,2.5,24.5,0.56,2.0,25.0,14.7
0.30,2.5,21.2,0.38,2.2,21.7,0.51,5.9,21.9,0.67,12.7,22.7,0.81,9.3,23.8,0.91,3.8,25.2,0.94,0.9,25.7,0.92,0.6,26.2,0.86,0.3,26.7,0.78,1.4,26.9,19.8
0.25,6.0,19.7,0.35,4.6,20.0,0.45,6.7,20.7,0.60,3.5,21.3,0.70,1.9,22.6,0.75,2.4,23.3,0.73,1.5,23.8,0.70,1.3,24.3,0.67,1.6,24.9,0.65,0.9,25.3,13.4
0.16,1.7,21.5,0.23,1.1,22.0,0.28,1.4,23.4,0.36,0.7,25.4,0.43,0.6,27.9,0.48,0.6,30.0,0.53,0.6,31.4,0.46,0.6,31.9,0.38,0.6,32.4,0.33,0.6,32.9,7.2
0.36,5.0,20.1,0.51,6.5,20.5,0.66,8.6,21.0,0.84,6.4,21.8,0.95,4.7,22.5,0.97,3.6,23.0,0.96,3.1,23.5,0.95,2.1,24.0,0.93,1.6,24.2,0.91,1.1,24.5,23.1
0.29,3.6,19.5,0.42,4.1,20.0,0.57,4.6,20.5,0.75,5.7,21.0,0.87,6.6,21.5,0.89,5.1,22.0,0.82,3.6,23.0,0.67,2.7,24.0,0.62,2.1,24.5,0.57,1.6,25.0,11.6
"""

def load_and_preprocess_data(csv_string):
    """
    Loads data, standardizes features, and prepares them for both Random Forest (2D) and CNN (3D).

    Args:
        csv_string (str): The raw CSV data as a string.

    Returns:
        tuple: X_rf_scaled, Y, X_cnn_scaled (Standardized feature matrices and target vector).
    """
    # Load data into a Pandas DataFrame
    from io import StringIO
    df = pd.read_csv(StringIO(csv_string))

    # Separate features (X) from the target (Y)
    Y = df['Yield_Ton_Ha'].values
    X_df = df.drop(columns=['Yield_Ton_Ha'])

    # --- Data Preparation for Random Forest (RF) ---
    X_rf = X_df.values # Flattened 2D data: (N_samples, N_features*N_timesteps)

    # 1. Standardize the data (CRITICAL for CNNs)
    scaler = StandardScaler()
    # Fit and transform the flattened 2D data
    X_rf_scaled = scaler.fit_transform(X_rf)

    # --- Data Preparation for Convolutional Neural Network (CNN) ---
    N_SAMPLES = X_df.shape[0]
    N_TIMESTEPS = 10
    N_FEATURES = 3

    # 2. Reshape the SCALED data from (N, 30) to (N, 10, 3) for CNN
    X_cnn_scaled = X_rf_scaled.reshape((N_SAMPLES, N_TIMESTEPS, N_FEATURES))

    print(f"Dataset loaded, standardized, and prepared:")
    print(f"- Total Samples (N): {N_SAMPLES}")
    print(f"- RF Input Shape (2D Scaled): {X_rf_scaled.shape}")
    print(f"- CNN Input Shape (3D Scaled): {X_cnn_scaled.shape}\n")

    return X_rf_scaled, Y, X_cnn_scaled

def train_and_evaluate_rf(X, Y):
    """
    Trains and evaluates a Random Forest Regressor using standardized data.
    """
    print("--- Training Random Forest Regressor (2D Scaled Input) ---")

    # Split the data (N=50, Test size = 10 samples)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    print(f"Split data: Training samples={len(X_train)}, Testing samples={len(X_test)}")

    # Initialize and train the Random Forest Regressor
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, Y_train)

    # Predict and evaluate
    Y_pred = model.predict(X_test)

    # Evaluation metrics are now calculated directly due to sufficient data size (N=10 test samples)
    mse = mean_squared_error(Y_test, Y_pred)
    r2 = r2_score(Y_test, Y_pred)

    print(f"\nModel Metrics (on test data):")
    print(f"  Mean Squared Error (MSE): {mse:.2f}")
    print(f"  R-squared (R2) Score: {r2:.2f}")

    print("\n--- Feature Importance (RF) ---")
    feature_importances = model.feature_importances_
    sorted_indices = np.argsort(feature_importances)[::-1]

    # Print the top 5 most important features by column index
    print("Top 5 Feature Indices by Importance (Flattened Index 0-29):")
    for i in sorted_indices[:5]:
        print(f"  Feature Index {i}: Importance {feature_importances[i]:.4f}")


def train_and_evaluate_cnn(X, Y):
    """
    Trains and evaluates a simple 1D Convolutional Neural Network (CNN)
    designed for time-series data using standardized data.
    """
    print("\n--- Training 1D Convolutional Neural Network (3D Scaled Input) ---")

    N_TIMESTEPS = X.shape[1]
    N_FEATURES = X.shape[2]

    # Split the data for the CNN
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

    print(f"Split data: Training samples={len(X_train)}, Testing samples={len(X_test)}")

    # 1. Define the CNN Model Architecture
    model = Sequential([
        # 1D Convolutional layer to extract features across the 10 timesteps
        Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(N_TIMESTEPS, N_FEATURES)),
        MaxPooling1D(pool_size=2), # Reduce the time dimension
        Dropout(0.2), # Added Dropout for regularization

        # Flatten the output for the Dense layers
        Flatten(),

        # Fully connected (Dense) layers for prediction
        Dense(50, activation='relu'),
        Dense(1) # Output layer with 1 unit for regression (Yield prediction)
    ])

    # 2. Compile the model
    # Increased epochs for better training with larger data
    model.compile(optimizer='adam', loss='mse')

    print("Model Summary:")
    model.summary()

    # 3. Train the model
    print("\nStarting CNN training (50 epochs)...")
    history = model.fit(
        X_train, Y_train,
        epochs=50, # Increased epochs from 5 to 50 for better training
        batch_size=4, # Increased batch size from 1 to 4
        verbose=0
    )
    print("Training finished.")

    # 4. Evaluate the model
    Y_pred = model.predict(X_test, verbose=0).flatten()

    # Evaluation metrics are now calculated directly
    mse = mean_squared_error(Y_test, Y_pred)
    r2 = r2_score(Y_test, Y_pred)

    print(f"\nModel Metrics (on test data):")
    print(f"  Mean Squared Error (MSE): {mse:.2f}")
    print(f"  R-squared (R2) Score: {r2:.2f}")


if __name__ == "__main__":
    # 1. Load, standardize, and prepare the data
    X_rf_scaled, Y_data, X_cnn_scaled = load_and_preprocess_data(csv_data)

    # 2. Train the Random Forest Model with scaled data
    train_and_evaluate_rf(X_rf_scaled, Y_data)

    # 3. Train and Evaluate the CNN Model with scaled data
    train_and_evaluate_cnn(X_cnn_scaled, Y_data)


Dataset loaded, standardized, and prepared:
- Total Samples (N): 65
- RF Input Shape (2D Scaled): (65, 30)
- CNN Input Shape (3D Scaled): (65, 10, 3)

--- Training Random Forest Regressor (2D Scaled Input) ---
Split data: Training samples=52, Testing samples=13

Model Metrics (on test data):
  Mean Squared Error (MSE): 0.87
  R-squared (R2) Score: 0.96

--- Feature Importance (RF) ---
Top 5 Feature Indices by Importance (Flattened Index 0-29):
  Feature Index 18: Importance 0.3036
  Feature Index 21: Importance 0.1766
  Feature Index 24: Importance 0.1462
  Feature Index 27: Importance 0.0985
  Feature Index 15: Importance 0.0744

--- Training 1D Convolutional Neural Network (3D Scaled Input) ---
Split data: Training samples=52, Testing samples=13
Model Summary:


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Starting CNN training (50 epochs)...
Training finished.

Model Metrics (on test data):
  Mean Squared Error (MSE): 1.11
  R-squared (R2) Score: 0.95
