In [33]:
import pandas as pd
import pickle
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

# Load Data
data = pd.read_csv('Crop_recommendation.csv')


le = LabelEncoder()
data['label'] = le.fit_transform(data['label'])  

X = data.drop('label', axis=1)
y = data['label']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)


pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardize Input Features
    ('model', RandomForestClassifier(n_estimators=100, max_depth=6))
])

# Train Pipeline
pipeline.fit(X_train, y_train)


y_pred = pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

# Convert Predictions Back to Crop Names
y_pred_labels = le.inverse_transform(y_pred)

print("Predicted Crops:", y_pred_labels[:10])
print(f'Random Forest Model Accuracy Score: {accuracy:.4f}')



Predicted Crops: ['muskmelon' 'watermelon' 'papaya' 'papaya' 'apple' 'mango' 'apple'
 'mothbeans' 'mungbean' 'lentil']
Random Forest Model Accuracy Score: 0.9879


In [5]:
from sklearn.model_selection import cross_val_score, KFold

kf = KFold(n_splits=5, shuffle=True, random_state=42)  # 5-fold cross-validation
cv_scores = cross_val_score(pipeline, X_train, y_train, cv=kf, scoring='accuracy')

print("Cross-Validation Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())
print("Standard Deviation CV Accuracy:", cv_scores.std())

Cross-Validation Scores: [0.98701299 0.99350649 0.99025974 0.99025974 0.97727273]
Mean CV Accuracy: 0.9876623376623377
Standard Deviation CV Accuracy: 0.005585925498079636


In [35]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(pipeline, X_train, y_train, cv=skf, scoring='accuracy')

print("Stratified CV Scores:", cv_scores)
print("Mean Stratified CV Accuracy:", cv_scores.mean())

Stratified CV Scores: [0.99675325 0.98051948 0.99025974 0.98376623 0.98701299]
Mean Stratified CV Accuracy: 0.9876623376623377


## hyperparameter tuning 

In [37]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'model__n_estimators': [50, 100, 200],
    'model__max_depth': [5, 6, 7],
}

grid_search = GridSearchCV(pipeline, param_grid, cv=skf, scoring='accuracy')
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
best_pipeline = grid_search.best_estimator_
y_pred_best = best_pipeline.predict(X_test)
print("Best Accuracy:", accuracy_score(y_test, y_pred_best))

Best Parameters: {'model__max_depth': 7, 'model__n_estimators': 100}
Best Accuracy: 0.9863636363636363


## sample data prediction

In [44]:
import numpy as np
sample_data_raw = np.array([[3, 76, 8, 20.8248451, 17.85057083, 94.599279991, 79.20509212]])


# Load the Standalone Model
# with open("model_artifacts/final_model.pkl", "rb") as model_file:
#     pipeline, le = pickle.load(model_file)

# Example Input Data (N, P, K, Temperature, Humidity, pH, Rainfall)
sample_data = np.array([[50, 30, 40, 25.4, 80.2, 6.5, 200.1]])

# Predict Crop Recommendation
predicted_label = best_pipeline.predict(sample_data_raw)
crop_name = le.inverse_transform(predicted_label)[0]

print(f"Recommended Crop: {crop_name}")



Recommended Crop: kidneybeans




In [1]:
import sklearn
print(sklearn.__version__)


1.0.2


In [48]:
import joblib

joblib.dump((best_pipeline), "model_artifacts/model.joblib")
joblib.dump((le),"model_artifacts/le.joblib")
print("Final Model (with Preprocessing) Saved Successfully as 'final_model.joblib'!")

Final Model (with Preprocessing) Saved Successfully as 'final_model.joblib'!


In [60]:
import numpy as np
import joblib
sample_data = np.array([[3, 76, 8, 20.8248451, 17.85057083, 94.599279991, 79.20509212]])
# with open ("model_artifacts/final_model.joblib","rb") as model_file:
#     model , le =  joblib.load(model_file)
# prediction  = model.predict(sample_data)
model = joblib.load("model_artifacts/model.joblib")
le = joblib.load("model_artifacts/le.joblib")
prediction = model.predict(sample_data)

# predicted_crop  =  le.inverse_transform(prediction)
prediction  = np.array([9])
predicted_crop  =  le.inverse_transform(prediction)
print(predicted_crop[0])       

kidneybeans




## further testing of model

In [23]:
import joblib
print(joblib.__version__)


1.0.1


In [31]:
import joblib
import numpy as np
with open("model_artifacts/model.joblib", "rb") as model_file:
    pipeline= joblib.load(model_file)

# Example Input Data (N, P, K, Temperature, Humidity, pH, Rainfall)
sample_data = np.array([[133,47,24,24,80,7,90]])
predicted = pipeline.predict(sample_data)
crop_name = le.inverse_transform(predicted)
print(crop_name[0])




AttributeError: 'RandomForestClassifier' object has no attribute 'inverse_transform'

In [None]:
import numpy as np
import json

# Your sample data as a NumPy array
sample_data_raw = np.array([[3, 76, 8, 20.8248451, 17.85057083, 94.599279991, 79.20509212]])

# Convert NumPy array to a Python list
sample_data_list = sample_data_raw.tolist()

# Create the JSON request format
payload = {
    "instances": sample_data_list
}

# Save to a JSON file (optional)
with open("prediction_input.json", "w") as f:
    json.dump(payload, f, indent=4)

print("Formatted JSON:", json.dumps(payload, indent=4))
