Importing all the libraries

In [1]:
import pandas as pd
import numpy as np
print("numpy and pandas imported")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.svm import SVC
print("sklearn library imported")
from xgboost import XGBClassifier
print("xgboost imported")
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
print("tensorflow imported")

import shap
import lime
import lime.lime_tabular
print("shap and lime imported")

import matplotlib.pyplot as plt
import seaborn as sns
print("matplot and seaborn imported")

import warnings
warnings.filterwarnings("ignore")
print("warnings imported")

print("All libraries imported successfully!")
 

numpy and pandas imported
sklearn library imported
xgboost imported


2025-06-17 11:42:28.457151: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


tensorflow imported
shap and lime imported
matplot and seaborn imported
All libraries imported successfully!


Importing the dataset

In [2]:
df = pd.read_csv("Crop_recommendation.csv")

print("Dataset loaded. Shape:",df.shape)

df.head(10)

Dataset loaded. Shape: (2200, 8)


Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice
5,69,37,42,23.058049,83.370118,7.073454,251.055,rice
6,69,55,38,22.708838,82.639414,5.700806,271.32486,rice
7,94,53,40,20.277744,82.894086,5.718627,241.974195,rice
8,89,54,38,24.515881,83.535216,6.685346,230.446236,rice
9,68,58,38,23.223974,83.033227,6.336254,221.209196,rice


Check for dataset input features and output labels

In [3]:
# Check the column names and data types
print(df.info())

# Check for missing values
print("\nMissing values in each column:\n", df.isnull().sum())

# Check how many unique crop labels are present
print("\nUnique crop types:\n", df['label'].value_counts())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB
None

Missing values in each column:
 N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

Unique crop types:
 label
rice           100
maize          100
chickpea       100
kidneybeans    100
pigeonpeas     100
mothbeans      100
mungbean       100
blackgram      100
lentil         100
pomegranate    100
banana

Separation of features and labels and label encoding of the output label 

In [5]:
# Separate input features and target
X = df.drop("label", axis=1)
y = df["label"]

# Encode string labels into numbers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Print label -> number mapping
label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
print(label_mapping)


{'apple': 0, 'banana': 1, 'blackgram': 2, 'chickpea': 3, 'coconut': 4, 'coffee': 5, 'cotton': 6, 'grapes': 7, 'jute': 8, 'kidneybeans': 9, 'lentil': 10, 'maize': 11, 'mango': 12, 'mothbeans': 13, 'mungbean': 14, 'muskmelon': 15, 'orange': 16, 'papaya': 17, 'pigeonpeas': 18, 'pomegranate': 19, 'rice': 20, 'watermelon': 21}


Train-test data split (80:20)

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)


XGBClassifier Model training

In [7]:
from xgboost import XGBClassifier

# Initialize the classifier
model = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')

# Train the model
model.fit(X_train, y_train)

print("Model training complete.")


Model training complete.


XGBClassification report 

In [8]:
from sklearn.metrics import accuracy_score, classification_report

# Predict on test set
y_pred = model.predict(X_test)

# Accuracy score
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Detailed classification report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 98.64%

Classification Report:

              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        23
      banana       1.00      1.00      1.00        21
   blackgram       0.95      1.00      0.98        20
    chickpea       1.00      1.00      1.00        26
     coconut       1.00      1.00      1.00        27
      coffee       1.00      1.00      1.00        17
      cotton       0.94      1.00      0.97        17
      grapes       1.00      1.00      1.00        14
        jute       0.96      0.96      0.96        23
 kidneybeans       1.00      1.00      1.00        20
      lentil       0.92      1.00      0.96        11
       maize       1.00      0.95      0.98        21
       mango       0.95      1.00      0.97        19
   mothbeans       1.00      0.96      0.98        24
    mungbean       1.00      1.00      1.00        19
   muskmelon       1.00      1.00      1.00        17
      orange       1.00      1.00      