In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import shap
import joblib
import matplotlib.pyplot as plt

In [2]:
# Loading the dataset
df = pd.read_csv('crop_yield.csv')
# Dropping state column to adjust for state per state imbalances.
df = df.drop(columns=['State'])
df.head()

Unnamed: 0,Crop,Crop_Year,Season,Area,Production,Annual_Rainfall,Fertilizer,Pesticide,Yield
0,Arecanut,1997,Whole Year,73814.0,56708,2051.4,7024878.38,22882.34,0.796087
1,Arhar/Tur,1997,Kharif,6637.0,4685,2051.4,631643.29,2057.47,0.710435
2,Castor seed,1997,Kharif,796.0,22,2051.4,75755.32,246.76,0.238333
3,Coconut,1997,Whole Year,19656.0,126905000,2051.4,1870661.52,6093.36,5238.051739
4,Cotton(lint),1997,Kharif,1739.0,794,2051.4,165500.63,539.09,0.420909


In [3]:
# Defining numerical and categorical features
numerical_features = ['Area', 'Production', 'Annual_Rainfall', 'Fertilizer', 'Pesticide']
categorical_features = ['Crop', 'Season']

In [4]:
# Preprocessing pipeline
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [5]:
# Fit the preprocessor to the training data
preprocessor.fit(df.drop('Yield', axis=1))

In [6]:
# Split the data into training and testing sets
X = df.drop('Yield', axis=1)
y = df['Yield']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [11]:
# Build the neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(preprocessor.transform(X_train).shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')

In [12]:
# Train the model
model.fit(preprocessor.transform(X_train).todense(), y_train, epochs=50, batch_size=32, verbose=1)




Epoch 1/50


InvalidArgumentError: Graph execution error:

TypeError: 'SparseTensor' object is not subscriptable
Traceback (most recent call last):

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\tensorflow\python\ops\script_ops.py", line 265, in __call__
    return func(device, token, args)
           ^^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\tensorflow\python\ops\script_ops.py", line 143, in __call__
    outputs = self._call(device, args)
              ^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\tensorflow\python\ops\script_ops.py", line 150, in _call
    ret = self._func(*args)
          ^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\tensorflow\python\autograph\impl\api.py", line 642, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\keras\engine\data_adapter.py", line 512, in py_method
    return [slice_array(inp) for inp in flat_inputs]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\keras\engine\data_adapter.py", line 512, in <listcomp>
    return [slice_array(inp) for inp in flat_inputs]
            ^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\keras\engine\data_adapter.py", line 508, in slice_array
    return training_utils.slice_arrays(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\keras\engine\training_utils.py", line 47, in slice_arrays
    entries = [[x[i : i + 1] for i in indices] for x in arrays]
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\keras\engine\training_utils.py", line 47, in <listcomp>
    entries = [[x[i : i + 1] for i in indices] for x in arrays]
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "c:\Users\G-MAN\anaconda3\Lib\site-packages\keras\engine\training_utils.py", line 47, in <listcomp>
    entries = [[x[i : i + 1] for i in indices] for x in arrays]
                ~^^^^^^^^^^^

TypeError: 'SparseTensor' object is not subscriptable


	 [[{{node EagerPyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_train_function_2556]

In [None]:
# Evaluate the model using R-squared score
y_pred = model.predict(preprocessor.transform(X_test)).ravel()
r2 = r2_score(y_test, y_pred)
print(f"R-squared score: {r2}")

In [None]:
# Save the model to a file
joblib.dump(model, 'Neural Network Model.pkl')

In [None]:
# Visualize actual vs. predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5, label='Predictions')
plt.title('Actual vs. Predicted Crop Yield')
plt.xlabel('Actual Yield')
plt.ylabel('Predicted Yield')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2, label='Actual')
plt.legend()
plt.show()

In [None]:
# Visualize distribution of prediction errors
errors = y_pred - y_test
mean_error = errors.mean()
median_error = errors.median()

In [None]:
plt.figure(figsize=(12, 8))
hist, bins, _ = plt.hist(errors, bins=50, range=(-50, 50), color='skyblue', edgecolor='black')
plt.title('Distribution of Prediction Errors', fontsize=16)
plt.xlabel('Prediction Error', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.yscale('log')
plt.axvline(mean_error, color='red', linestyle='dashed', linewidth=2, label=f'Mean Error: {mean_error:.2f}')
plt.axvline(median_error, color='green', linestyle='dashed', linewidth=2, label=f'Median Error: {median_error:.2f}')
plt.legend(fontsize=12)
plt.show()

In [None]:
errors_df = pd.DataFrame(errors)
print(errors_df)
print(errors_df.isnull().sum())
error_descriptive_stats = errors_df.describe()
print(error_descriptive_stats)