In [2]:
from __future__ import print_function
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report
from sklearn import metrics
from sklearn import tree
import warnings
warnings.filterwarnings('ignore')


In [5]:
df = pd.read_csv("crop_recommendation.csv")

In [6]:
df.head()

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall,label
0,90,42,43,20.879744,82.002744,6.502985,202.935536,rice
1,85,58,41,21.770462,80.319644,7.038096,226.655537,rice
2,60,55,44,23.004459,82.320763,7.840207,263.964248,rice
3,74,35,40,26.491096,80.158363,6.980401,242.864034,rice
4,78,42,42,20.130175,81.604873,7.628473,262.71734,rice


In [7]:
df.shape

(2200, 8)

In [8]:
df.dtypes

N                int64
P                int64
K                int64
temperature    float64
humidity       float64
ph             float64
rainfall       float64
label           object
dtype: object

In [26]:
df.columns

Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')

In [9]:
df['label'].value_counts()

rice           100
maize          100
jute           100
cotton         100
coconut        100
papaya         100
orange         100
apple          100
muskmelon      100
watermelon     100
grapes         100
mango          100
banana         100
pomegranate    100
lentil         100
blackgram      100
mungbean       100
mothbeans      100
pigeonpeas     100
kidneybeans    100
chickpea       100
coffee         100
Name: label, dtype: int64

In [15]:
df.iloc[:,:-1]

Unnamed: 0,N,P,K,temperature,humidity,ph,rainfall
0,90,42,43,20.879744,82.002744,6.502985,202.935536
1,85,58,41,21.770462,80.319644,7.038096,226.655537
2,60,55,44,23.004459,82.320763,7.840207,263.964248
3,74,35,40,26.491096,80.158363,6.980401,242.864034
4,78,42,42,20.130175,81.604873,7.628473,262.717340
...,...,...,...,...,...,...,...
2195,107,34,32,26.774637,66.413269,6.780064,177.774507
2196,99,15,27,27.417112,56.636362,6.086922,127.924610
2197,118,33,30,24.131797,67.225123,6.362608,173.322839
2198,117,32,34,26.272418,52.127394,6.758793,127.175293


In [16]:
# Splitting into train and test data

from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(df.iloc[:,:-1], df.iloc[:,-1], test_size=0.2, random_state=True)

In [19]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(n_estimators=20, random_state=0)
RF.fit(Xtrain,Ytrain)

predicted_values = RF.predict(Xtest)

x = metrics.accuracy_score(Ytest, predicted_values)
print("RF's Accuracy is: ", x)

print(classification_report(Ytest,predicted_values))

RF's Accuracy is:  0.9931818181818182
              precision    recall  f1-score   support

       apple       1.00      1.00      1.00        22
      banana       1.00      1.00      1.00        18
   blackgram       0.92      1.00      0.96        22
    chickpea       1.00      1.00      1.00        15
     coconut       1.00      1.00      1.00        18
      coffee       1.00      1.00      1.00        17
      cotton       1.00      1.00      1.00        22
      grapes       1.00      1.00      1.00        29
        jute       0.96      1.00      0.98        25
 kidneybeans       1.00      1.00      1.00        20
      lentil       1.00      1.00      1.00        18
       maize       1.00      1.00      1.00        20
       mango       1.00      1.00      1.00        17
   mothbeans       1.00      0.92      0.96        24
    mungbean       1.00      1.00      1.00        24
   muskmelon       1.00      1.00      1.00        26
      orange       1.00      1.00      1.00

In [21]:
from sklearn.model_selection import cross_val_score

In [22]:
score = cross_val_score(RF, df.iloc[:,:-1], df.iloc[:,-1], cv=5)

In [23]:
score

array([0.99772727, 0.99545455, 0.99772727, 0.99318182, 0.98863636])

In [24]:
#Saving trained Random Forest model with Pickle

import pickle
filename='RandomForest.pkl'

RF_pkl=open(filename, 'wb')
pickle.dump(RF, RF_pkl)

RF_pkl.close()

In [25]:
#Making predictions

data = np.array([[99, 15, 27, 27.417112, 56.636362, 6.086922, 127.924610]])
prediction = RF.predict(data)
print(prediction)

['coffee']
