In [59]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [60]:
data = pd.read_csv('Crop_recommendation.csv')
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [61]:
data.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [62]:
data.columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [63]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
data['label'] = encoder.fit_transform(data['label'])

In [64]:
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,20
1,85,58,41,21.770462,80.319644,7.038096,226.655537,20
2,60,55,44,23.004459,82.320763,7.840207,263.964248,20
3,74,35,40,26.491096,80.158363,6.980401,242.864034,20
4,78,42,42,20.130175,81.604873,7.628473,262.71734,20


In [65]:
data.label.value_counts()

label
20    100
11    100
8     100
6     100
4     100
17    100
16    100
0     100
15    100
21    100
7     100
12    100
1     100
19    100
10    100
2     100
14    100
13    100
18    100
9     100
3     100
5     100
Name: count, dtype: int64

In [66]:
import pickle
pickle.dump(encoder,open('encoder.pkl','wb'))

In [67]:
X = data.drop('label', axis=1)
y = data['label']

In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [69]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [70]:
pickle.dump(scaler, open('scaler.pkl','wb'))

In [71]:
X_train

array([[-9.03426596e-01, -1.12616170e+00, -6.68506601e-01, ...,
         9.36586183e-01,  1.93473784e-01,  5.14970176e-03],
       [-3.67051340e-01,  7.70358846e-01, -5.70589522e-01, ...,
        -1.00470485e-01,  8.63917548e-01, -6.05290566e-01],
       [-1.17161422e+00,  5.89737842e-01, -4.53089028e-01, ...,
        -3.82774991e-01,  1.05029771e+00, -1.04580687e+00],
       ...,
       [-1.06433917e+00, -5.24091685e-01, -3.35588533e-01, ...,
        -8.98381379e-01, -6.34357580e-04, -4.37358211e-02],
       [-1.06433917e+00,  2.12501638e+00,  3.05234239e+00, ...,
         3.86340190e-01, -1.48467347e-01, -5.69036842e-01],
       [-5.01145154e-01,  7.40255346e-01, -5.11839275e-01, ...,
        -4.18045489e-01,  6.86860180e-01, -8.96531475e-01]])

In [72]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import ExtraTreeClassifier,DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier
from sklearn.metrics import accuracy_score

In [73]:
models = {
    'AdaBoostClassifier':AdaBoostClassifier(),
    'GaussianNB':GaussianNB(),
    'GradientBoostingClassifier':GradientBoostingClassifier(),
    'SVC':SVC(),
    'RandomForestClassifier':RandomForestClassifier(),
    'DecisionTreeClassifier':DecisionTreeClassifier(),
    'LogisticRegression':LogisticRegression(),
    'KNeighborsClassifier':KNeighborsClassifier(),
    'ExtraTreeClassifier':ExtraTreeClassifier(),
    'AdaBoostClassifier':AdaBoostClassifier()
}

In [74]:
for name,model in models.items():
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    score = accuracy_score(y_test,y_pred)
    print(f'{name} accuracy is {score}')



AdaBoostClassifier accuracy is 0.09545454545454546
GaussianNB accuracy is 0.9954545454545455
GradientBoostingClassifier accuracy is 0.9818181818181818
SVC accuracy is 0.9681818181818181
RandomForestClassifier accuracy is 0.9931818181818182
DecisionTreeClassifier accuracy is 0.9840909090909091
LogisticRegression accuracy is 0.9636363636363636
KNeighborsClassifier accuracy is 0.9568181818181818
ExtraTreeClassifier accuracy is 0.9136363636363637


In [75]:
##using randomforestclassifier
rnd_clf = RandomForestClassifier()
rnd_clf.fit(X_train,y_train)

pickle.dump(rnd_clf,open('rnd_clf.pkl','wb'))

In [86]:
def recommendation(N,P,K,temperature,humidity,ph,rainfall):
    features = np.array([[N,P,K,temperature,humidity,ph,rainfall]])
    scaled_features = scaler.transform(features)
    prediction = rnd_clf.predict(scaled_features).reshape(1,-1)
    result = encoder.inverse_transform(prediction[0])
    return result[0]

In [87]:
data.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,20
1,85,58,41,21.770462,80.319644,7.038096,226.655537,20
2,60,55,44,23.004459,82.320763,7.840207,263.964248,20
3,74,35,40,26.491096,80.158363,6.980401,242.864034,20
4,78,42,42,20.130175,81.604873,7.628473,262.71734,20


In [88]:
N=85
P=58
K=41
temperature = 21.77046169
humidity = 80.31964408
ph = 7.038096361
rainfall =226.6555374

predict = recommendation(N,P,K,temperature,humidity,ph,rainfall)

predict



'rice'

In [89]:
scaler.feature_names_in_

array(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall'],
      dtype=object)