In [38]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [39]:
df = pd.read_csv('Crop_recommendation.csv')
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [40]:
df['label'].value_counts()

label
rice           100
maize          100
chickpea       100
kidneybeans    100
pigeonpeas     100
mothbeans      100
mungbean       100
blackgram      100
lentil         100
pomegranate    100
banana         100
mango          100
grapes         100
watermelon     100
muskmelon      100
apple          100
orange         100
papaya         100
coconut        100
cotton         100
jute           100
coffee         100
Name: count, dtype: int64

In [41]:
encoder = LabelEncoder()

df['label'] = encoder.fit_transform(df['label'])
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,20
1,85,58,41,21.770462,80.319644,7.038096,226.655537,20
2,60,55,44,23.004459,82.320763,7.840207,263.964248,20
3,74,35,40,26.491096,80.158363,6.980401,242.864034,20
4,78,42,42,20.130175,81.604873,7.628473,262.717340,20
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,5
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,5
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,5
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,5


In [42]:
numeric_col = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']

scaler = MinMaxScaler()
df[numeric_col] = scaler.fit_transform(df[numeric_col])
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,0.642857,0.264286,0.190,0.345886,0.790267,0.466264,0.656458,20
1,0.607143,0.378571,0.180,0.371445,0.770633,0.549480,0.741675,20
2,0.428571,0.357143,0.195,0.406854,0.793977,0.674219,0.875710,20
3,0.528571,0.214286,0.175,0.506901,0.768751,0.540508,0.799905,20
4,0.557143,0.264286,0.185,0.324378,0.785626,0.641291,0.871231,20
...,...,...,...,...,...,...,...,...
2195,0.764286,0.207143,0.135,0.515037,0.608410,0.509353,0.566064,5
2196,0.707143,0.071429,0.110,0.533473,0.494359,0.401561,0.386972,5
2197,0.842857,0.200000,0.125,0.439202,0.617880,0.444433,0.550071,5
2198,0.835714,0.192857,0.145,0.500627,0.441760,0.506045,0.384280,5


In [43]:
X = df.drop('label', axis=1)
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [44]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Random Forest": RandomForestClassifier(),
    "Gradient Boosting": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(name + " trained.")
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))


Logistic Regression trained.
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       0.89      0.85      0.87        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       0.94      1.00      0.97        17
           6       0.94      1.00      0.97        17
           7       1.00      1.00      1.00        14
           8       0.84      0.70      0.76        23
           9       0.91      1.00      0.95        20
          10       0.69      1.00      0.81        11
          11       1.00      0.95      0.98        21
          12       0.66      1.00      0.79        19
          13       1.00      0.54      0.70        24
          14       0.86      1.00      0.93        19
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        1

In [45]:
model_rfc = RandomForestClassifier()
model_rfc.fit(X_train, y_train)
y_pred = model_rfc.predict(X_test)
print("Classification Report \n", classification_report(y_test, y_pred))
print("Confusion Matrix \n", confusion_matrix(y_test, y_pred))

Classification Report 
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        23
           1       1.00      1.00      1.00        21
           2       1.00      1.00      1.00        20
           3       1.00      1.00      1.00        26
           4       1.00      1.00      1.00        27
           5       1.00      1.00      1.00        17
           6       1.00      1.00      1.00        17
           7       1.00      1.00      1.00        14
           8       0.92      1.00      0.96        23
           9       1.00      1.00      1.00        20
          10       0.92      1.00      0.96        11
          11       1.00      1.00      1.00        21
          12       1.00      1.00      1.00        19
          13       1.00      0.96      0.98        24
          14       1.00      1.00      1.00        19
          15       1.00      1.00      1.00        17
          16       1.00      1.00      1.00        14
   

In [46]:
import pickle

pickle.dump(encoder, open('label_encoder.pkl', 'wb'))
pickle.dump(model_rfc, open('crop_recommendation_model.pkl', 'wb'))
pickle.dump(scaler, open('minmax_scaler.pkl', 'wb'))

In [47]:
import numpy as np
import pandas as pd
import pickle

encoder = pickle.load(open('label_encoder.pkl', 'rb'))
model = pickle.load(open('crop_recommendation_model.pkl', 'rb'))
scaler = pickle.load(open('minmax_scaler.pkl', 'rb'))

def predict_crop(N, P, K, temperature, humidity, ph, rainfall):
    input_df = pd.DataFrame([[N, P, K, temperature, humidity, ph, rainfall]],
                            columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'])
    input_scaled = scaler.transform(input_df)
    input_scaled_df = pd.DataFrame(input_scaled, columns=input_df.columns)
    prediction_encoded = model.predict(input_scaled_df)
    prediction = encoder.inverse_transform(prediction_encoded)
    return prediction[0]
result = predict_crop(20, 40, 40, 25.0, 80.0, 6.5, 100.0)
print(result)


papaya


In [48]:
# Sample input values (at least 10 different sets)
test_inputs = [
    (90, 40, 40, 25.0, 80.0, 6.5, 100.0),
    (60, 30, 20, 22.5, 75.0, 6.0, 120.0),
    (80, 60, 50, 27.0, 82.0, 6.8, 95.0),
    (100, 45, 35, 30.0, 85.0, 7.0, 110.0),
    (70, 55, 65, 28.0, 78.0, 6.4, 105.0),
    (65, 40, 50, 26.0, 70.0, 5.8, 90.0),
    (55, 25, 45, 24.0, 72.0, 6.3, 130.0),
    (85, 60, 55, 29.0, 90.0, 6.7, 115.0),
    (95, 50, 40, 31.0, 88.0, 6.9, 102.0),
    (50, 45, 40, 23.0, 88.0, 2.1, 85.0)
]

# Loop through inputs and print predictions
for i, values in enumerate(test_inputs, 1):
    result = predict_crop(*values)
    print(f"{i}. Input: {values} -> Recommended Crop: {result}")


1. Input: (90, 40, 40, 25.0, 80.0, 6.5, 100.0) -> Recommended Crop: jute
2. Input: (60, 30, 20, 22.5, 75.0, 6.0, 120.0) -> Recommended Crop: maize
3. Input: (80, 60, 50, 27.0, 82.0, 6.8, 95.0) -> Recommended Crop: banana
4. Input: (100, 45, 35, 30.0, 85.0, 7.0, 110.0) -> Recommended Crop: jute
5. Input: (70, 55, 65, 28.0, 78.0, 6.4, 105.0) -> Recommended Crop: papaya
6. Input: (65, 40, 50, 26.0, 70.0, 5.8, 90.0) -> Recommended Crop: coffee
7. Input: (55, 25, 45, 24.0, 72.0, 6.3, 130.0) -> Recommended Crop: pomegranate
8. Input: (85, 60, 55, 29.0, 90.0, 6.7, 115.0) -> Recommended Crop: papaya
9. Input: (95, 50, 40, 31.0, 88.0, 6.9, 102.0) -> Recommended Crop: jute
10. Input: (50, 45, 40, 23.0, 88.0, 2.1, 85.0) -> Recommended Crop: papaya


In [49]:
import sklearn
print(sklearn.__version__)

1.6.1
