## Importa bibliotecas

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import pandas as pd
import seaborn as sns

### Lê bases de dados

In [None]:
df = pd.read_csv("../input/train.csv")
df_test = pd.read_csv("../input/test.csv")

### Descrição da base de treinamento

Podemos ver a descrição da base de dados, afim de analisarmos melhor os dados e escolher qual algoritmo de classificação utilizar

In [None]:
print(df.describe())

### Descrição da base de teste

In [None]:
print(df_test.describe())

### Obtem classificação da base de treinamento e remove dados categóricos



In [None]:
y = df["type"]
indexes_test = df_test["id"]

df = df.drop(["id", "color"],axis=1)
df_test = df_test.drop(["id", "color"],axis=1)

### Plota dados da base de treinamento

No gráfico a seguir pode-se acompanhar a distribuição dos atributos da base, com base nesses gráficos percebe-se que nenhum dos dados apresenta uma distinção perfeita entre as classes, porém os atributos bone_lenght e hair_length possuem uma melhor distribuição entre as 3 classes.

Mesmo assim, julga-se necessário o uso de todos os atributos durante a geração do modelo, inclusive, do dado categórico de cor.

In [None]:
%matplotlib inline
sns.set()
sns.pairplot(df,hue="type")

df = df.drop(["type"],axis=1)

### Executa train_test_split com tamanho da base de teste com 20%

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=0)

### Define os hiperparâmetros que serão analisados pelo GridSearch do KNN, afim de buscar os hiperparâmetros ótimos

In [None]:
knn = KNeighborsClassifier()
params={'n_neighbors':[1,5,10,20,30,40,50,60,70,80,90,100], 'weights':('uniform', 'distance'), 'algorithm':('auto', 'ball_tree', 'kd_tree', 'brute'), 'leaf_size':[1,5,10,20,30,40,50,60,70,80,90,100], 'p':[1,2,3]  }
grid_search_knn = GridSearchCV(knn, param_grid=params, n_jobs=8, cv=5)

### Executa o GridSearch para o KNN e exibe os hiperparâmetros ótimos

In [None]:
grid_search_knn.fit(X_train, y_train).predict(X_test)
print(grid_search_knn.best_params_)

### Treina o modelo e realiza classificação da base de teste utilizando o KNN

In [None]:
knn = KNeighborsClassifier(**grid_search_knn.best_params_)

knn.fit(X_train,y_train)
y_pred_knn= knn.predict(X_test) 

### Relatório de classificação do KNN

In [None]:
print(classification_report(y_pred_knn,y_test))

### Predição da base de classificação

In [None]:
y_pred = knn.predict(df_test)

### Exibe a classificação dos dados

In [None]:
Y = pd.DataFrame()
Y["id"] = indexes_test
Y["type"] = y_pred
Y.to_csv("submission.csv",index=False)

print(Y.head(5))

### Teste para avaliar a precisão do Adaboost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier()

ada = AdaBoostClassifier()
params = {'n_estimators':[10,20,30,40,50,60,70,80,90,100], 'learning_rate':[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0], 'algorithm':('SAMME','SAMME.R')}
grid_search_ada = GridSearchCV(ada ,params, n_jobs=8, cv=5)

grid_search_ada.fit(X_train, y_train).predict(X_test)
print(grid_search_ada.best_params_)

ada = AdaBoostClassifier(**grid_search_ada.best_params_)

ada.fit(X_train,y_train)
y_pred_ada= ada.predict(X_test) 

print(classification_report(y_pred_ada,y_test))

### Teste para avaliar a precisão do SVM

In [None]:
svc = SVC()
params = {'kernel':('linear', 'poly', 'rbf'), 'coef0':[0.001,0.01,0.05,0.5,1],'C':[1,5,10,0.1,0.01],'gamma':[0.001,0.01,0.05,0.5,1]}
grid_search_svc = GridSearchCV(svc ,params, n_jobs=8, cv=5)

grid_search_svc.fit(X_train, y_train).predict(X_test)
print(grid_search_svc.best_params_)

svc = SVC(**grid_search_svc.best_params_)

svc.fit(X_train,y_train)
y_pred_svc= svc.predict(X_test) 

print(classification_report(y_pred_svc,y_test))



### Teste para avaliar a precisão do Naive Bayes Gaussiano

In [None]:
from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB(priors=None)

gnb.fit(X_train,y_train)
y_pred_gnb= gnb.predict(X_test) 

print(classification_report(y_pred_gnb,y_test))

### Teste para avaliar a precisão da Árvore de Decisão

In [None]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()

tree = DecisionTreeClassifier()
params = {'criterion':('gini', 'entropy'),'splitter':('best','random')}
grid_search_tree = GridSearchCV(tree ,params, n_jobs=8, cv=5)

grid_search_tree.fit(X_train, y_train).predict(X_test)
print(grid_search_tree.best_params_)

tree = DecisionTreeClassifier(**grid_search_tree.best_params_)

tree.fit(X_train,y_train)
y_pred_tree= tree.predict(X_test) 

print(classification_report(y_pred_tree,y_test))