In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

In [41]:
dataset = pd.read_csv("Crop_recommendation.csv")
dataset.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [42]:
dataset.shape

(2200, 8)

In [43]:
dataset.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [44]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   N            2200 non-null   int64  
 1   P            2200 non-null   int64  
 2   K            2200 non-null   int64  
 3   temperature  2200 non-null   float64
 4   humidity     2200 non-null   float64
 5   ph           2200 non-null   float64
 6   rainfall     2200 non-null   float64
 7   label        2200 non-null   object 
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [45]:
dataset.duplicated().sum()

np.int64(0)

In [46]:
dataset.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [47]:
dataset["label"].unique()

array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
       'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
       'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
       'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
      dtype=object)

In [48]:
dataset_dict = {
    "rice":1,
    "maize":2,
    "chickpea":3,
    "kidneybeans":4,
    "pigeonpeas" :5,
    "mothbeans":6,
    "mungbean":7,
    "blackgram":8,
    "lentil":9,
    "pomegranate":10,
    "banana":11,
    "mango":12,
    "grapes":13,
    "watermelon":14,
    "muskmelon":15,
    "apple":16,
    "orange":17,
    "papaya":18,
    "coconut":19,
    "cotton":20,
    "jute":21,
    "coffee":22
}

In [49]:
dataset["label"] = dataset["label"].map(dataset_dict)
dataset.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,1
1,85,58,41,21.770462,80.319644,7.038096,226.655537,1
2,60,55,44,23.004459,82.320763,7.840207,263.964248,1
3,74,35,40,26.491096,80.158363,6.980401,242.864034,1
4,78,42,42,20.130175,81.604873,7.628473,262.71734,1


In [50]:
x = dataset.drop("label", axis=1)
y = dataset["label"]

In [51]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

MinMax Scaler

In [52]:
mx = MinMaxScaler()
x_train = mx.fit_transform(x_train)
x_test = mx.fit_transform(x_test)

Standard Scaler

In [53]:
sc = StandardScaler()
sc.fit(x_train)
x_train = sc.transform(x_train)
x_test = sc.transform(x_test)


In [54]:
models = {
    "LogisticRegression" : LogisticRegression(),
    "GaussianNB": GaussianNB(),
    "SVC" : SVC(),
    "KNeighborsClassifier": KNeighborsClassifier(),
    "DecisionTreeClassifier" : DecisionTreeClassifier(),
    "ExtraTreeClassifier": ExtraTreeClassifier(),
    "RandomForestClassifier":RandomForestClassifier(),
    "BaggingClassifier":BaggingClassifier(),
    "GradientBoostingClassifier":GradientBoostingClassifier(),
    "AdaBoostClassifier": AdaBoostClassifier()
}

In [55]:
for name, model in models.items():
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    score = accuracy_score(y_pred, y_test)
    print(f"{name} model accuracy :{score*100}")

LogisticRegression model accuracy :95.68181818181817
GaussianNB model accuracy :99.31818181818181
SVC model accuracy :97.04545454545455
KNeighborsClassifier model accuracy :96.36363636363636
DecisionTreeClassifier model accuracy :97.95454545454545
ExtraTreeClassifier model accuracy :91.36363636363637
RandomForestClassifier model accuracy :98.63636363636363
BaggingClassifier model accuracy :97.95454545454545
GradientBoostingClassifier model accuracy :96.36363636363636
AdaBoostClassifier model accuracy :14.318181818181818


In [97]:
randclf = RandomForestClassifier ()
randclf.fit(x_train, y_train)
y_pred = randclf.predict(x_test)
print("Accuracy score:",accuracy_score(y_test, y_pred)*100)
randclf.score(x_train, y_train)*100, randclf.score(x_test, y_test)*100

Accuracy score: 99.0909090909091


(100.0, 99.0909090909091)

In [98]:
dataset.columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [99]:
def recommendation(N, P, K, temperature, humidity, ph, rainfall):
    features = np.array([[N, P, K, temperature, humidity, ph, rainfall]])
    mx_features = mx.fit_transform(features)
    sc_mx_festure = sc.fit_transform(mx_features)
    prediction = randclf.predict(sc_mx_festure).reshape(1,-1)
    return prediction[0]

In [100]:
dataset.iloc[1]

N               85.000000
P               58.000000
K               41.000000
temperature     21.770462
humidity        80.319644
ph               7.038096
rainfall       226.655537
label            1.000000
Name: 1, dtype: float64

In [101]:
N = 100.00  	
P = 58.00
K = 41.00
temperature = 21.774637
humidity = 80.413269
ph = 7.780064
rainfall = 226.774507

predict = recommendation(N,P,K,temperature,humidity,ph, rainfall)

In [102]:
print(predict[0])

18
