## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import xgboost 
from sklearn.metrics import classification_report, confusion_matrix

import pickle

import warnings
warnings.filterwarnings('ignore')

## Data reading and checking appropriate values

In [2]:
df = pd.read_csv('Crop_recommendation.csv')

In [3]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [4]:
df.describe()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
count,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0,2200.0
mean,50.551818,53.362727,48.149091,25.616244,71.481779,6.46948,103.463655
std,36.917334,32.985883,50.647931,5.063749,22.263812,0.773938,54.958389
min,0.0,5.0,5.0,8.825675,14.25804,3.504752,20.211267
25%,21.0,28.0,20.0,22.769375,60.261953,5.971693,64.551686
50%,37.0,51.0,32.0,25.598693,80.473146,6.425045,94.867624
75%,84.25,68.0,49.0,28.561654,89.948771,6.923643,124.267508
max,140.0,145.0,205.0,43.675493,99.981876,9.935091,298.560117


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2200 entries, 0 to 2199
Data columns (total 8 columns):
N              2200 non-null int64
P              2200 non-null int64
K              2200 non-null int64
temperature    2200 non-null float64
humidity       2200 non-null float64
ph             2200 non-null float64
rainfall       2200 non-null float64
label          2200 non-null object
dtypes: float64(4), int64(3), object(1)
memory usage: 137.6+ KB


In [6]:
df.label.nunique()

22

In [7]:
df.isnull().sum()

N              0
P              0
K              0
temperature    0
humidity       0
ph             0
rainfall       0
label          0
dtype: int64

In [8]:
df.label.value_counts()

mothbeans      100
rice           100
blackgram      100
mungbean       100
jute           100
muskmelon      100
lentil         100
watermelon     100
orange         100
apple          100
mango          100
chickpea       100
pigeonpeas     100
pomegranate    100
coffee         100
maize          100
papaya         100
coconut        100
cotton         100
banana         100
kidneybeans    100
grapes         100
Name: label, dtype: int64

In [9]:
df.duplicated().sum()

0

In [10]:
df.corr()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
N,1.0,-0.23146,-0.140512,0.026504,0.190688,0.096683,0.05902
P,-0.23146,1.0,0.736232,-0.127541,-0.118734,-0.138019,-0.063839
K,-0.140512,0.736232,1.0,-0.160387,0.190859,-0.169503,-0.053461
temperature,0.026504,-0.127541,-0.160387,1.0,0.20532,-0.017795,-0.030084
humidity,0.190688,-0.118734,0.190859,0.20532,1.0,-0.008483,0.094423
ph,0.096683,-0.138019,-0.169503,-0.017795,-0.008483,1.0,-0.109069
rainfall,0.05902,-0.063839,-0.053461,-0.030084,0.094423,-0.109069,1.0


## Dividing into dependent and independent values

In [11]:
X = df.drop('label', axis=1).values
y = df.label.values

## Scaling values

In [12]:
# sc = StandardScaler()
# X = sc.fit_transform(X)

## Splitting into train and test sets

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=25)

## Creating model

In [14]:
xgb = xgboost.XGBClassifier()

In [15]:
xgb.fit(X_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=8, num_parallel_tree=1,
              objective='multi:softprob', random_state=0, reg_alpha=0,
              reg_lambda=1, scale_pos_weight=None, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [16]:
xgb.score(X_test, y_test)

0.9795454545454545

In [17]:
y_pred = xgb.predict(X_test)

In [18]:
X_test[0]

array([111.        ,  88.        ,  55.        ,  29.44795403,
        78.34971537,   5.50539383,  96.45042585])

In [19]:
print(confusion_matrix(y_test, y_pred))

[[25  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0 24  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0 23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0 17  0  0  0  0  0  1  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0 23  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0 19  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0 15  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0 12  0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0 22  0  0  0  0  0  0  0  0  0  0  0  7  0]
 [ 0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  1  0  0  0  0  0  0  0 15  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0 24  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0 19  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0

In [20]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        25
      banana       1.00      1.00      1.00        24
   blackgram       0.96      1.00      0.98        23
    chickpea       1.00      0.94      0.97        18
     coconut       1.00      1.00      1.00        23
      coffee       1.00      1.00      1.00        19
      cotton       1.00      1.00      1.00        15
      grapes       1.00      1.00      1.00        12
        jute       1.00      0.76      0.86        29
 kidneybeans       0.95      1.00      0.97        19
      lentil       1.00      0.94      0.97        16
       maize       1.00      1.00      1.00        19
       mango       1.00      1.00      1.00        24
   mothbeans       1.00      1.00      1.00        19
    mungbean       1.00      1.00      1.00        20
   muskmelon       1.00      1.00      1.00        19
      orange       1.00      1.00      1.00        13
      papaya       1.00    

## Saving trained model in pickle format

In [21]:
filename = 'crop_recomd.pkl'

xgb_model = open(filename, 'wb')
pickle.dump(xgb, xgb_model)

xgb_model.close()