# CLASSIFIER ALGORITHMS

They are:
- Decision Tree
- KNN (k-nearest neighbors)
- Logistic Regression
- Naive Bayes
- SVM (Support Vector Machine)

## Imports

In [19]:
import pandas as pd
import numpy as np

# Algoritmos de classificação
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import seaborn as sb

import matplotlib.pyplot as plt

## Manipulating data

In [20]:
local = 'vendas.csv'
origin_def = pd.read_csv(local)

# Retirando colunas inúteis a fim de evitar a maldição
df = origin_def.drop(['Cabecalho', 'Cidade', 'Pais', 'Horario Clique'], axis=1)

In [21]:
dados = df.drop('Comprou', axis=1)
target = df['Comprou']

dados_treino, dados_teste, target_treino, target_teste = train_test_split(dados, target, test_size=0.3, random_state=3)

## Algorithms
Instanciating classifier methods' classes

### K-nearest neighbors

In [33]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(dados_treino, target_treino)

KNeighborsClassifier()

### Decision tree

In [23]:
tree = DecisionTreeClassifier(criterion='entropy')
tree.fit(dados_treino, target_treino)

DecisionTreeClassifier(criterion='entropy')

### Naive Bayes

In [24]:
nb = GaussianNB()
nb.fit(dados_treino, target_treino)

GaussianNB()

### Lofistic Regression

In [31]:
lr = LogisticRegression()
lr.fit(dados_treino, target_treino)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


LogisticRegression()

### SVM
Utilizando o método linear

In [26]:
svm = SVC(kernel='linear', C=1.0, random_state=3, probability=True);
svm.fit(dados_treino, target_treino)

SVC(kernel='linear', probability=True, random_state=3)

## Score

In [27]:
knn_accuraty = knn.score(dados_teste, target_teste)
tree_accuraty = tree.score(dados_teste, target_teste)
nb_accuraty = nb.score(dados_teste, target_teste)
lr_accuraty = lr.score(dados_teste, target_teste)
svm_accuraty = svm.score(dados_teste, target_teste)

print(f'KNN: {knn_accuraty}')
print(f'Tree: {tree_accuraty}')
print(f'Bayes: {nb_accuraty}')
print(f'L.R.: {lr_accuraty}')
print(f'SVM: {svm_accuraty}')


KNN: 0.6766666666666666
Tree: 0.9366666666666666
Bayes: 0.9566666666666667
L.R.: 0.9466666666666667
SVM: 0.9566666666666667


## Others tests

### Predicting a new instance's class

In [28]:
# new instance
person = [[50,17,6000, 180, 0]]
p_knn = knn.predict(person)
p_tree = tree.predict(person)
p_nb = nb.predict(person)
p_lr = lr.predict(person)
p_svm = svm.predict(person)

print(f'KNN: {p_knn}')
print(f'Tree: {p_tree}')
print(f'Bayes: {p_nb}')
print(f'L.R.: {p_lr}')
print(f'SVM: {p_svm}')


KNN: [1]
Tree: [0]
Bayes: [1]
L.R.: [1]
SVM: [1]




### Classes probability

In [32]:
knn_prob = knn.predict_proba(person)
tree_prob = tree.predict_proba(person)
nb_prob = nb.predict_proba(person)
lr_prob = lr.predict_proba(person)
svm_prob = svm.predict_proba(person)


print(f'KNN: {knn_prob}')
print(f'Tree: {tree_prob}')
print(f'Bayes: {nb_prob}')
print(f'L.R.: {lr_prob}')
print(f'SVM: {svm_prob}')

KNN: [[0. 1.]]
Tree: [[1. 0.]]
Bayes: [[4.20466175e-09 9.99999996e-01]]
L.R.: [[0.08986031 0.91013969]]
SVM: [[4.47526818e-07 9.99999552e-01]]




### Confusion matrix

In [30]:
matrix_knn = confusion_matrix(target_teste, knn.predict(dados_teste))
matrix_tree = confusion_matrix(target_teste, tree.predict(dados_teste))
matrix_nb = confusion_matrix(target_teste, nb.predict(dados_teste))
matrix_lr = confusion_matrix(target_teste, lr.predict(dados_teste))
matrix_svm = confusion_matrix(target_teste, svm.predict(dados_teste))
matrix_svm

array([[142,   6],
       [  7, 145]], dtype=int64)