# IMPORTING LIBRARIES

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from PIL import Image
import os
import pandas as pd

In [3]:


df = pd.read_csv('Crop_recommendation.csv')

In [4]:
num_cols = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']

# DATA PREPROCESSING

In [5]:
# 3️⃣ Scale numerical features using StandardScaler
from sklearn.preprocessing import LabelEncoder, StandardScaler
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])
# Before outlier removal
print("Original unique classes:", df['label'].nunique())  # Should show 22

# After outlier removal
print("Classes remaining:", df['label'].nunique())         # Might show <22
print("Class counts:\n", df['label'].value_counts())


Original unique classes: 22
Classes remaining: 22
Class counts:
 label
rice           100
maize          100
chickpea       100
kidneybeans    100
pigeonpeas     100
mothbeans      100
mungbean       100
blackgram      100
lentil         100
pomegranate    100
banana         100
mango          100
grapes         100
watermelon     100
muskmelon      100
apple          100
orange         100
papaya         100
coconut        100
cotton         100
jute           100
coffee         100
Name: count, dtype: int64


In [6]:
def remove_outliers_iqr(data, column, multiplier=1.5):
    q1, q3 = np.percentile(data[column], [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - multiplier * iqr
    upper_bound = q3 + multiplier * iqr
    return data[(data[column] >= lower_bound) & (data[column] <= upper_bound)]

# Apply outlier removal for rainfall and temperature
df = remove_outliers_iqr(df, 'rainfall')
df = remove_outliers_iqr(df, 'temperature')
df = remove_outliers_iqr(df, 'ph')
df = remove_outliers_iqr(df, 'humidity')
df = remove_outliers_iqr(df, 'P')
df = remove_outliers_iqr(df, 'K')

In [7]:
le = LabelEncoder()
df['label_encoded'] = le.fit_transform(df['label'])

# 6️⃣ One-hot encode the encoded labels
y_one_hot = to_categorical(df['label_encoded'])

# 7️⃣ Split the data into features (X) and target (y)
X = df[num_cols]  # Scaled and cleaned numeric features
y = y_one_hot     # One-hot encoded labels

In [8]:
# 8️⃣ Train-test split (80% train, 20% test)
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=df['label_encoded'])

# ✅ Final Output Check
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (1431, 7)
y_train shape: (1431, 20)
X_test shape: (358, 7)
y_test shape: (358, 20)


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1789 entries, 0 to 2199
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   N              1789 non-null   float64
 1   P              1789 non-null   float64
 2   K              1789 non-null   float64
 3   temperature    1789 non-null   float64
 4   humidity       1789 non-null   float64
 5   ph             1789 non-null   float64
 6   rainfall       1789 non-null   float64
 7   label          1789 non-null   object 
 8   label_encoded  1789 non-null   int64  
dtypes: float64(7), int64(1), object(1)
memory usage: 139.8+ KB


In [10]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label,label_encoded
0,1.068797,-0.344551,-0.101688,-0.935587,0.472666,0.043302,1.810361,rice,18
13,1.150079,0.07997,-0.239928,-0.316294,0.475098,0.665416,1.488986,rice,18
14,1.177172,-0.101968,-0.220179,0.009799,0.412515,0.618459,1.931415,rice,18
17,1.095891,-0.556811,-0.180682,-0.359958,0.401478,0.647977,1.870886,rice,18
20,1.041704,-0.253582,-0.239928,-0.847629,0.40402,-0.034901,1.492993,rice,18


# MODEL

In [11]:
# Build the neural network
model = Sequential()
model.add(Dense(64, input_dim=7, activation='relu'))  # Hidden layer, input_dim=784 (28x28)
model.add(Dense(32, activation='relu'))  # Additional hidden layer
model.add(Dense(20, activation='softmax'))  # Output layer for 10 classes


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [12]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [13]:
# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1, validation_split=0.2)

Epoch 1/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - accuracy: 0.1682 - loss: 2.9088 - val_accuracy: 0.2021 - val_loss: 2.6794
Epoch 2/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3083 - loss: 2.5378 - val_accuracy: 0.3833 - val_loss: 2.2807
Epoch 3/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5000 - loss: 2.0726 - val_accuracy: 0.5784 - val_loss: 1.7847
Epoch 4/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6378 - loss: 1.5931 - val_accuracy: 0.7735 - val_loss: 1.2771
Epoch 5/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8051 - loss: 1.1092 - val_accuracy: 0.8328 - val_loss: 0.8924
Epoch 6/100
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8807 - loss: 0.7679 - val_accuracy: 0.8676 - val_loss: 0.6416
Epoch 7/100
[1m36/36[0m [32m━━

<keras.src.callbacks.history.History at 0x7cc87954b7d0>

In [14]:
# Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.9738 - loss: 0.0725
Test Accuracy: 0.9777


In [16]:
for i, label in enumerate(label_encoder.classes_):
    print(f"{i}: {label}")


0: banana
1: blackgram
2: chickpea
3: coconut
4: coffee
5: cotton
6: jute
7: kidneybeans
8: lentil
9: maize
10: mango
11: mothbeans
12: mungbean
13: muskmelon
14: orange
15: papaya
16: pigeonpeas
17: pomegranate
18: rice
19: watermelon


In [18]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Define column names
feature_names = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']

# Sample input
test_data = np.array([[120, 50, 40, 18.0, 50.0, 6.8, 80.0]])
test_df = pd.DataFrame(test_data, columns=feature_names)

# Scale the test input
scaled_data = scaler.transform(test_df)

# Predict using the model
prediction = model.predict(scaled_data)
predicted_class_index = np.argmax(prediction)

# Decode the predicted label (make sure label_encoder is defined)
label_encoder = LabelEncoder()
label_encoder.fit(df['label'])  # or wherever your full label column is
predicted_label = label_encoder.inverse_transform([predicted_class_index])

print(f'Predicted Class Index: {predicted_class_index}')
print(f'Predicted Crop Type: {predicted_label[0]}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Predicted Class Index: 9
Predicted Crop Type: maize
