In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.multioutput import MultiOutputRegressor
import pickle

In [3]:
# Load the dataset
data = pd.read_csv(r"E:\Datasets\Crop_Recommendation.csv")


In [4]:
# Define the features and target variables
X = data[['Crop']]  # Crop is the only feature
y = data[['Nitrogen', 'Phosphorus', 'Potassium', 'Temperature', 'Humidity', 'pH_Value', 'Rainfall']]

In [5]:

# One-hot encode the crop names
encoder = OneHotEncoder(sparse_output=False)
X_encoded = encoder.fit_transform(X)



In [6]:
# Save scaler 
with open('encoder.pkl', 'wb') as f:
    pickle.dump(encoder, f)

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=43)


In [18]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import GridSearchCV, KFold

# Define the parameter grid for RandomForestRegressor within MultiOutputRegressor
param_grid = {
    'estimator__n_estimators': [50, 100, 150],
    'estimator__max_depth': [10, 20, None],
    'estimator__min_samples_split': [2, 5],
    'estimator__min_samples_leaf': [1, 2]
}

# Define k-fold cross-validation
kf = KFold(n_splits=5, shuffle=True, random_state=43)

# Set up the GridSearchCV with MultiOutputRegressor wrapped around RandomForestRegressor
multi_output_rf = MultiOutputRegressor(RandomForestRegressor(random_state=43))
grid_search = GridSearchCV(multi_output_rf, param_grid, cv=kf, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best estimator after grid search
best_model = grid_search.best_estimator_



# Print results
print(f"Best parameters: {grid_search.best_params_}")


Best parameters: {'estimator__max_depth': 20, 'estimator__min_samples_leaf': 1, 'estimator__min_samples_split': 2, 'estimator__n_estimators': 150}


In [19]:
# Save the trained model and encoder
with open('crop_nutrient_model.pkl', 'wb') as model_file:
    pickle.dump(best_model, model_file)
with open('crop_encoder.pkl', 'wb') as encoder_file:
    pickle.dump(encoder, encoder_file)


In [21]:
# Predict on the test set
y_pred = best_model.predict(X_test)


In [22]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred, multioutput='raw_values')
r2 = r2_score(y_test, y_pred, multioutput='variance_weighted')

print("Mean Squared Error for each nutrient/condition:", mse)
print("Overall R^2 Score:", r2)

Mean Squared Error for each nutrient/condition: [1.49832389e+02 5.75178860e+01 9.74254102e+00 1.21753569e+01
 1.64651124e+01 3.49226769e-01 4.51173211e+02]
Overall R^2 Score: 0.9214963880581968
