In [2]:
import pandas as pd
from sklearn import model_selection
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

In [3]:
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import numpy as np
import pylab as pl

In [4]:
dataset = pd.read_csv('Dataset/CropDataset.csv')

In [5]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20, random_state = 42 )

# Split dataset, normalize and reduce features

In [7]:
#Normalize the features

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Decision Tree

In [8]:
from sklearn.tree import DecisionTreeClassifier
classifierDT = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifierDT.fit(X_train, Y_train)

## Support Vector Machine

In [9]:
from sklearn.svm import SVC
classifierSVM = SVC(kernel = 'sigmoid', random_state = 0)
classifierSVM.fit(X_train, Y_train)

## Random Forest

In [10]:
from sklearn.ensemble import RandomForestClassifier
classifierRF = RandomForestClassifier(n_estimators = 11, criterion = 'entropy', random_state = 42)
classifierRF.fit(X_train, Y_train)

## K-NN Classifier

In [11]:
from sklearn.neighbors import KNeighborsClassifier
classifierKNN = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifierKNN.fit(X_train, Y_train)

## Ensembling

In [12]:
SVM=classifierSVM

In [13]:
    estimators = []
estimators.append(('Desicion Tree', classifierDT))
estimators.append(('SVM', classifierSVM))
estimators.append(('Random Forest', classifierRF))
estimators.append(('kneighbors', classifierKNN))

In [14]:
ClassifieEN = VotingClassifier(estimators,voting='hard')
ClassifieEN.fit(X_train, Y_train)

In [15]:
Y_pred_DT = classifierDT.predict(X_test)
Y_pred_RF = classifierRF.predict(X_test)
Y_pred_SVM =classifierSVM.predict(X_test)
Y_pred_KNN =classifierKNN.predict(X_test)
Y_pred_EN =ClassifieEN.predict(X_test)

In [16]:
from sklearn.metrics import accuracy_score
accuracy_DT = accuracy_score(Y_test, Y_pred_DT)
accuracy_RF = accuracy_score(Y_test, Y_pred_RF)
accuracy_SVM = accuracy_score(Y_test, Y_pred_SVM)
accuracy_KNN = accuracy_score(Y_test, Y_pred_KNN)
accuracy_EN = accuracy_score(Y_test, Y_pred_EN)

In [17]:
print("Decision Tree: " + str(accuracy_DT* 100))
print("Random Forest: " + str(accuracy_RF * 100))
print("Support Vector Classifier: " + str(accuracy_SVM * 100))
print("KNN: " + str(accuracy_KNN * 100))
print("Ensemble: " + str(accuracy_EN * 100))

Decision Tree: 98.63636363636363
Random Forest: 99.0909090909091
Support Vector Classifier: 74.77272727272727
KNN: 95.9090909090909
Ensemble: 97.95454545454545


In [18]:
import pickle
pickle.dump(classifierDT, open('Saved/model_DT.pkl','wb'))
pickle.dump(classifierRF, open('Saved/model_RF.pkl','wb'))
pickle.dump(classifierSVM, open('Saved/model_SVM.pkl','wb'))
pickle.dump(classifierKNN, open('Saved/model_KNN.pkl','wb'))
pickle.dump(ClassifieEN, open('Saved/model_EN.pkl','wb'))

In [19]:
single_sample = X_test[1].reshape(1, -1)

In [20]:
classifierSVM.predict(single_sample)

array(['watermelon'], dtype=object)

In [21]:
print(single_sample)

[[ 1.84193961 -1.01412024  0.03580615  0.11785242  0.67956909 -0.28013033
  -0.96895253]]
