In [None]:

import numpy as np
import pandas as pd


# sklearn tools
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split

# 3 classifiers
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

# evaluate
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
df = pd.read_csv("Crop_recommendation.csv")
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.717340,rice
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,coffee
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,coffee
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,coffee
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,coffee


In [None]:

encoder = LabelEncoder()

df['label'] = encoder.fit_transform(df['label'])
df

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,20
1,85,58,41,21.770462,80.319644,7.038096,226.655537,20
2,60,55,44,23.004459,82.320763,7.840207,263.964248,20
3,74,35,40,26.491096,80.158363,6.980401,242.864034,20
4,78,42,42,20.130175,81.604873,7.628473,262.717340,20
...,...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507,5
2196,99,15,27,27.417112,56.636362,6.086922,127.924610,5
2197,118,33,30,24.131797,67.225123,6.362608,173.322839,5
2198,117,32,34,26.272418,52.127394,6.758793,127.175293,5


In [None]:
# select numeric columns to normalize
numeric_cols = ['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall']

# initialize scaler
scaler = MinMaxScaler()

# fit & transform
df[numeric_cols] = scaler.fit_transform(df[numeric_cols])

# check result
print(df.head())


          N         P      K  temperature  humidity        ph  rainfall  label
0  0.642857  0.264286  0.190     0.345886  0.790267  0.466264  0.656458     20
1  0.607143  0.378571  0.180     0.371445  0.770633  0.549480  0.741675     20
2  0.428571  0.357143  0.195     0.406854  0.793977  0.674219  0.875710     20
3  0.528571  0.214286  0.175     0.506901  0.768751  0.540508  0.799905     20
4  0.557143  0.264286  0.185     0.324378  0.785626  0.641291  0.871231     20


In [None]:
x = df.drop('label',axis=1)
y = df['label']

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)

In [None]:
models = {
    "lg": LogisticRegression(),
    "RFC": RandomForestClassifier(),
    "GBC": GradientBoostingClassifier()
}

for name, model in models.items():
    model.fit(x_train, y_train)       # fit the model
    y_pred = model.predict(x_test)    # make predictions

    print(f"\n{name} Results")
    print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
    print('Classification Report:\n', classification_report(y_test, y_pred))



lg Results
Confusion Matrix:
 [[16  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 14  0  0  0  0  0  0  0  2  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 30  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 16  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 25  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 15  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 13  0  0  0  0  0  0  0  0  0  0  0  7  0]
 [ 0  0  0  0  0  0  0  0  0 14  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  1  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 15  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 21  0  0  0  0  0  0  0  0  0]
 [ 0  0  1  0  0  0  0  0  0  0  1  0  6 16  1  0  0  0  0  0  0  0]
 [ 

In [None]:
model_gbc = GradientBoostingClassifier()

model_gbc.fit(x_train, y_train)

y_pred = model_gbc.predict(x_test)

print('Confusion Matrix:\n', confusion_matrix(y_test, y_pred))
print('Classification Report:\n', classification_report(y_test, y_pred))


Confusion Matrix:
 [[16  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 16  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 30  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 16  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 25  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 15  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 21  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0  0  0  1  0]
 [ 0  0  0  0  0  0  0  0  0 14  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  1  0  0  0  0  0  0  0 18  0  0  1  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 15  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 21  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0 25  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  

In [None]:
import os
import pickle

# make sure 'models' folder exists
os.makedirs("models", exist_ok=True)

# save objects
pickle.dump(encoder, open("models/encoder.pkl", 'wb'))
pickle.dump(model_gbc, open("models/model_gbc.pkl", 'wb'))
pickle.dump(scaler, open("models/scaler.pkl", 'wb'))


In [None]:
import numpy as np
import pickle
import pandas as pd

# load encoder, scaler, and model
encoder = pickle.load(open("models/encoder.pkl", 'rb'))
scaler = pickle.load(open("models/scaler.pkl", 'rb'))
model_gbc = pickle.load(open("models/model_gbc.pkl", 'rb'))

def predict_crop(N, P, K, temperature, humidity, ph, rainfall):
    # create dataframe
    input_df = pd.DataFrame([[N, P, K, temperature, humidity, ph, rainfall]],
                            columns=['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'])

    # scale input
    input_scaled = scaler.transform(input_df)

    # predict
    prediction_encoded = model_gbc.predict(input_scaled)
    prediction = encoder.inverse_transform(prediction_encoded)

    return prediction[0]

# example call (7 values only)
result = predict_crop(90, 40, 25, 80, 70, 6.5, 100.0)
print("Recommended Crop:", result)




Recommended Crop: maize


In [None]:
# Sample input values (at least 10 different sets)
test_inputs = [
    (98, 40, 40, 25.0, 80.0, 6.5, 100.0),
    (68, 30, 20, 22.5, 75.0, 6.0, 120.0),
    (80, 60, 50, 27.0, 82.8, 6.8, 95.0),
    (100, 45, 45, 30.0, 85.0, 7.0, 110.0),
    (70, 55, 65, 28.8, 78.0, 6.4, 185.8),
    (65, 40, 50, 26.0, 70.0, 5.8, 90.8),
    (55, 20, 25, 24.0, 72.8, 6.3, 130.0),
    (85, 65, 60, 29.0, 90.0, 6.7, 115.0),
    (95, 50, 70, 31.8, 88.8, 6.9, 102.0),
    (58, 25, 30, 23.0, 68.8, 6.1, 85.8)
]

# Loop through inputs and print predictions
for i, values in enumerate(test_inputs, 1):
    result = predict_crop(*values)   # unpack tuple into function
    print(f"{i}. Input: {values}\n   Recommended Crop: {result}\n")




1. Input: (98, 40, 40, 25.0, 80.0, 6.5, 100.0)
   Recommended Crop: jute





2. Input: (68, 30, 20, 22.5, 75.0, 6.0, 120.0)
   Recommended Crop: maize





3. Input: (80, 60, 50, 27.0, 82.8, 6.8, 95.0)
   Recommended Crop: jute





4. Input: (100, 45, 45, 30.0, 85.0, 7.0, 110.0)
   Recommended Crop: cotton





5. Input: (70, 55, 65, 28.8, 78.0, 6.4, 185.8)
   Recommended Crop: jute





6. Input: (65, 40, 50, 26.0, 70.0, 5.8, 90.8)
   Recommended Crop: maize





7. Input: (55, 20, 25, 24.0, 72.8, 6.3, 130.0)
   Recommended Crop: pomegranate





8. Input: (85, 65, 60, 29.0, 90.0, 6.7, 115.0)
   Recommended Crop: banana





9. Input: (95, 50, 70, 31.8, 88.8, 6.9, 102.0)
   Recommended Crop: jute





10. Input: (58, 25, 30, 23.0, 68.8, 6.1, 85.8)
   Recommended Crop: jute



In [None]:
import sklearn
print(sklearn.__version__)

1.6.1
