### Importando Libs

In [31]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import pandas as pd

### Carregando um conjunto de dados de exemplo

In [14]:
vinhos = datasets.load_wine()

In [15]:
vinhos.data

array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
        1.065e+03],
       [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
        1.050e+03],
       [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
        1.185e+03],
       ...,
       [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
        8.350e+02],
       [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
        8.400e+02],
       [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
        5.600e+02]], shape=(178, 13))

In [16]:
# descrição dos atributos (colunas)
atributos = ['Alcool', 'Acido malico', 'Cinzas', 
             'Alcalinidade das cinzas', 'Magnesio', 
             'Fenois totais', 'Flavonoides', 
             'Fenois nao flavonoides', 'Proantocianinas',
             'Intensidade da cor', 'Matiz',
             'OD280/OD315 de vinhos diluídos', 'Prolina']

# criando a coluna alvo
alvo = 'classe'

# criando o dataframe com as colunas atributos definidas
vinhos_df = pd.DataFrame(vinhos.data, columns=atributos)

# Adiciona o rótulo da classe
vinhos_df[alvo] = vinhos.target

### Exploração dos dados

In [17]:
vinhos_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Alcool                          178 non-null    float64
 1   Acido malico                    178 non-null    float64
 2   Cinzas                          178 non-null    float64
 3   Alcalinidade das cinzas         178 non-null    float64
 4   Magnesio                        178 non-null    float64
 5   Fenois totais                   178 non-null    float64
 6   Flavonoides                     178 non-null    float64
 7   Fenois nao flavonoides          178 non-null    float64
 8   Proantocianinas                 178 non-null    float64
 9   Intensidade da cor              178 non-null    float64
 10  Matiz                           178 non-null    float64
 11  OD280/OD315 de vinhos diluídos  178 non-null    float64
 12  Prolina                         178 

In [18]:
vinhos_df.columns

Index(['Alcool', 'Acido malico', 'Cinzas', 'Alcalinidade das cinzas',
       'Magnesio', 'Fenois totais', 'Flavonoides', 'Fenois nao flavonoides',
       'Proantocianinas', 'Intensidade da cor', 'Matiz',
       'OD280/OD315 de vinhos diluídos', 'Prolina', 'classe'],
      dtype='object')

In [19]:
vinhos_df.describe()

Unnamed: 0,Alcool,Acido malico,Cinzas,Alcalinidade das cinzas,Magnesio,Fenois totais,Flavonoides,Fenois nao flavonoides,Proantocianinas,Intensidade da cor,Matiz,OD280/OD315 de vinhos diluídos,Prolina,classe
count,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0,178.0
mean,13.000618,2.336348,2.366517,19.494944,99.741573,2.295112,2.02927,0.361854,1.590899,5.05809,0.957449,2.611685,746.893258,0.938202
std,0.811827,1.117146,0.274344,3.339564,14.282484,0.625851,0.998859,0.124453,0.572359,2.318286,0.228572,0.70999,314.907474,0.775035
min,11.03,0.74,1.36,10.6,70.0,0.98,0.34,0.13,0.41,1.28,0.48,1.27,278.0,0.0
25%,12.3625,1.6025,2.21,17.2,88.0,1.7425,1.205,0.27,1.25,3.22,0.7825,1.9375,500.5,0.0
50%,13.05,1.865,2.36,19.5,98.0,2.355,2.135,0.34,1.555,4.69,0.965,2.78,673.5,1.0
75%,13.6775,3.0825,2.5575,21.5,107.0,2.8,2.875,0.4375,1.95,6.2,1.12,3.17,985.0,2.0
max,14.83,5.8,3.23,30.0,162.0,3.88,5.08,0.66,3.58,13.0,1.71,4.0,1680.0,2.0


### Pré-processamento


In [20]:
# Copiando o dataframe e separando as colunas de Atributos e Alvo
X = vinhos_df[atributos].copy()
y = vinhos_df[alvo].copy()

# Instanciar o scaler e ajustar as características
scaler = StandardScaler()
scaler.fit(X.values)

#Transformar as características
X_scaled = scaler.transform(X.values)

print(X_scaled[0])

[ 1.51861254 -0.5622498   0.23205254 -1.16959318  1.91390522  0.80899739
  1.03481896 -0.65956311  1.22488398  0.25171685  0.36217728  1.84791957
  1.01300893]


### Separação para treino e teste

In [26]:
X_train_scaled, X_test_scaled, y_train, y_test = train_test_split(X_scaled, y, train_size=0.7)

print(f'Tamanho do treino: {round(len(X_train_scaled)/len(X)*100)}% \n Tamanho do teste: {round(len(X_test_scaled)/len(X)*100)}%')

Tamanho do treino: 70% 
 Tamanho do teste: 30%


### Construção do modelo

In [29]:
# Construindo o modelo
knn = KNeighborsClassifier(n_neighbors=3)

# Adicionando conjunto de parâmetros X (atributos) e Y (classe) para treino
knn.fit(X_train_scaled, y_train) 

0,1,2
,n_neighbors,3
,weights,'uniform'
,algorithm,'auto'
,leaf_size,30
,p,2
,metric,'minkowski'
,metric_params,
,n_jobs,


### Avaliação do modelo

In [32]:
y_pred = knn.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f'Taxa de acerto: {accuracy*100:.2f}%')

Taxa de acerto: 98.15%
