### Importando Bibliotecas

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots


from sklearn.metrics import classification_report
from sklearn import metrics
from sklearn import tree
from sklearn.model_selection import cross_val_score
import warnings
warnings.filterwarnings('ignore')

### Lendo o dataset

In [None]:
crop = pd.read_csv('Crop_recommendation.csv')
crop.head(5)

### Escolhendo Features e Label Encoder

In [None]:
from sklearn.preprocessing import LabelEncoder

def encode_and_map_labels(column):
    # Crie um objeto LabelEncoder e ajuste-o aos valores originais
    encoder = LabelEncoder()
    encoded_labels = encoder.fit_transform(column)
    
    # Crie um dicionário de mapeamento de valores originais para valores codificados
    label_mapping = dict(zip(encoded_labels, column))
    
    return encoded_labels, label_mapping

# Suponha que você tenha uma coluna 'label' no DataFrame 'crop'
encoded_labels, label_mapping = encode_and_map_labels(crop['label'])

# Agora, você pode recuperar os valores originais a partir dos valores codificados
original_labels = [label_mapping[label] for label in encoded_labels]

print(label_mapping)


In [None]:
print(original_labels)

In [None]:
features = crop[['N', 'P','K','temperature', 'humidity', 'ph', 'rainfall']]
target = crop['label']

In [None]:
acc = []
model = []

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(features,target,test_size = 0.2,random_state =2)

### Modelos de aloritmos de classificação

- KNeighbors Classifier

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()

knn.fit(x_train,y_train)

predicted_values = knn.predict(x_test)

x = metrics.accuracy_score(y_test, predicted_values)
acc.append(x)
model.append('K Nearest Neighbours')
print("KNN precisão: ", x)

print(classification_report(y_test,predicted_values))

In [None]:
crop['label'].unique()

In [None]:
score = cross_val_score(knn,features,target,cv=5)
print('Cross validation: ',score)

In [None]:
# Precisão do treino
knn_train_accuracy = knn.score(x_train,y_train)
print("Precisão do treino:",knn.score(x_train,y_train))
# Precisão do teste
knn_test_accuracy = knn.score(x_test,y_test)
print("Precisão do teste:",knn.score(x_test,y_test))

### Hiper-parâmetros para o KNN

In [None]:
from sklearn.model_selection import GridSearchCV

grid_params = { 'n_neighbors' : [12,13,14,15,16,17,18],
               'weights' : ['uniform','distance'],
               'metric' : ['minkowski','euclidean','manhattan']}

gs = GridSearchCV(KNeighborsClassifier(), grid_params, verbose = 1, cv=3, n_jobs = -1)

g_res = gs.fit(x_train, y_train)

In [None]:
g_res.best_params_

In [None]:
g_res.best_score_

### Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
DT = DecisionTreeClassifier(criterion="entropy",random_state=2,max_depth=5)

DT.fit(x_train,y_train)

predicted_values = DT.predict(x_test)
x = metrics.accuracy_score(y_test, predicted_values)
acc.append(x)
model.append('Decision Tree')
print("Decision Tree precisão: ", x*100)

print(classification_report(y_test,predicted_values))

In [None]:
score = cross_val_score(DT, features, target,cv=5)
print('Cross validation score: ',score)

In [None]:
#Treino
dt_train_accuracy = DT.score(x_train,y_train)
print("Precisão do treino:",DT.score(x_train,y_train))
#Teste
dt_test_accuracy = DT.score(x_test,y_test)
print("Precisão do teste:",DT.score(x_test,y_test))

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

RF = RandomForestClassifier(n_estimators=20, random_state=0)
RF.fit(x_train,y_train)

predicted_values = RF.predict(x_test)

x = metrics.accuracy_score(y_test, predicted_values)
acc.append(x)
model.append('RF')
print("Random Forest precisão: ", x)

print(classification_report(y_test,predicted_values))

In [None]:
score = cross_val_score(RF,features,target,cv=5)
print('Cross validation: ',score)

In [None]:
#Treino
rf_train_accuracy = RF.score(x_train,y_train)
print("Precisão do treino:",RF.score(x_train,y_train))
#Teste
rf_test_accuracy = RF.score(x_test,y_test)
print("Precisão do teste:",RF.score(x_test,y_test))

### Navis Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
NaiveBayes = GaussianNB()

NaiveBayes.fit(x_train,y_train)

predicted_values = NaiveBayes.predict(x_test)
x = metrics.accuracy_score(y_test, predicted_values)
acc.append(x)
model.append('Naive Bayes')
print("Naive Bayes precisao: ", x)

print(classification_report(y_test,predicted_values))

In [None]:
score = cross_val_score(NaiveBayes,features,target,cv=5)
print('Cross validation: ',score)

In [None]:
nb_train_accuracy = NaiveBayes.score(x_train,y_train)
print("Precisão do treino:",NaiveBayes.score(x_train,y_train))
nb_test_accuracy = NaiveBayes.score(x_test,y_test)
print("Precisão do teste:",NaiveBayes.score(x_test,y_test))

### XGBoost

In [None]:
import xgboost as xgb
XB = xgb.XGBClassifier()
XB.fit(x_train,y_train)

predicted_values = XB.predict(x_test)

x = metrics.accuracy_score(y_test, predicted_values);
acc.append(x)
model.append('XGBoost')
print("XGBoost precisão: ", x)

print(classification_report(y_test,predicted_values))

In [None]:
score = cross_val_score(XB,features,target,cv=5)
print('Cross validation score: ',score)

In [None]:
XB_train_accuracy = XB.score(x_train,y_train)
print("Precisão do treino:",XB.score(x_train,y_train))

XB_test_accuracy = XB.score(x_test,y_test)
print("Precisão do teste:",XB.score(x_test,y_test))

### Comparação entre os modelos

In [None]:
plt.figure(figsize=[14,7],dpi = 100, facecolor='white')
plt.title('Accuracy Comparison')
plt.xlabel('Accuracy')
plt.ylabel('ML Algorithms')
sns.barplot(x = acc,y = model,palette='viridis')
plt.savefig('plot.png', dpi=300, bbox_inches='tight')

In [None]:
label = ['KNN', 'Decision Tree','Random Forest','Naive Bayes','XG Boost']
Test = [knn_test_accuracy, dt_test_accuracy,rf_test_accuracy,
        nb_test_accuracy, XB_test_accuracy]
Train = [knn_train_accuracy,  dt_train_accuracy, rf_train_accuracy,
         nb_train_accuracy, XB_train_accuracy]

f, ax = plt.subplots(figsize=(20,7))
X_axis = np.arange(len(label))
plt.bar(X_axis - 0.2,Test, 0.4, label = 'Test', color=('midnightblue'))
plt.bar(X_axis + 0.2,Train, 0.4, label = 'Train', color=('mediumaquamarine'))

plt.xticks(X_axis, label)
plt.xlabel("Algoritmos")
plt.ylabel("Precisão")
plt.title("Teste vs Treino")
plt.legend()
plt.show()

Analisando os gráficos fica fácil de perceber que o Naive Bayes foi o que teve a melhor precisão <br>
Porém, quando um algoritmo atinge 100% de precisão, é necessário desconfiar <br>
Pois é incomum e pode significar algum erro ou falta de dados para comparação <br>

In [None]:
model

In [None]:
import pickle
pickle_out = open('classifier_crop.pkl','wb')
pickle.dump(model[0],pickle_out)
pickle_out.close()