In [2]:
import sklearn
from sklearn.utils import shuffle
from sklearn.neighbors import KNeighborsClassifier
import pandas as pd
import numpy as np
from sklearn import linear_model, preprocessing


In [3]:
# Loading Data
data = pd.read_csv("data/car.data")
print(data.head())  # To check if our data is loaded correctly

  buying  maint door persons lug_boot safety  class
0  vhigh  vhigh    2       2    small    low  unacc
1  vhigh  vhigh    2       2    small    med  unacc
2  vhigh  vhigh    2       2    small   high  unacc
3  vhigh  vhigh    2       2      med    low  unacc
4  vhigh  vhigh    2       2      med    med  unacc


In [4]:
# Converting Data

"""
    As you may have noticed much of our data is not numeric. 
    In order to train the K-Nearest Neighbor Classifier we must convert any string data into some kind of a number. 
    Luckily for us sklearn has a method that can do this for us.

    We will start by creating a label encoder object and then use that to encode each column of our data into 
    integers.
"""

le = preprocessing.LabelEncoder()

In [6]:
buying = le.fit_transform(list(data["buying"]))
maint = le.fit_transform(list(data["maint"]))
door = le.fit_transform(list(data["door"]))
persons = le.fit_transform(list(data["persons"]))
lug_boot = le.fit_transform(list(data["lug_boot"]))
safety = le.fit_transform(list(data["safety"]))
cls = le.fit_transform(list(data["class"]))

In [7]:
predict = "class"

In [8]:
"""
    Now we need to recombine our data into a feature list and a label list. 
    We can use the zip() function to makes things easier.
"""
X = list(zip(buying, maint, door, persons, lug_boot, safety))  # features
y = list(cls)  # labels

In [9]:
"""Finally we will split our data into training and testing data
 using the same process seen previously."""

x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, test_size = 0.1)

In [10]:
print(x_train, y_test)

[(2, 2, 2, 1, 1, 2), (1, 0, 3, 1, 1, 0), (0, 2, 3, 0, 0, 0), (3, 1, 1, 1, 0, 0), (3, 1, 1, 2, 1, 0), (0, 2, 2, 2, 1, 1), (0, 3, 1, 2, 0, 1), (2, 3, 1, 2, 2, 2), (2, 2, 3, 2, 1, 0), (0, 3, 2, 0, 0, 1), (3, 0, 2, 1, 0, 1), (1, 2, 0, 2, 1, 0), (2, 2, 2, 0, 0, 2), (2, 2, 1, 2, 0, 0), (2, 0, 0, 1, 2, 2), (2, 1, 2, 0, 2, 2), (1, 2, 1, 2, 1, 1), (1, 0, 1, 0, 0, 1), (3, 0, 2, 2, 2, 1), (2, 2, 0, 2, 2, 2), (0, 1, 3, 2, 2, 1), (3, 1, 0, 2, 1, 1), (1, 3, 0, 0, 0, 0), (0, 0, 0, 2, 2, 2), (0, 0, 3, 1, 1, 0), (1, 0, 2, 0, 1, 2), (2, 3, 0, 2, 1, 0), (1, 1, 2, 0, 1, 2), (2, 2, 0, 1, 0, 0), (0, 2, 1, 1, 0, 1), (0, 0, 1, 0, 2, 0), (2, 0, 2, 2, 0, 0), (0, 3, 1, 1, 1, 1), (0, 0, 2, 1, 2, 2), (2, 3, 3, 2, 1, 1), (0, 0, 3, 0, 1, 2), (0, 0, 1, 0, 0, 2), (1, 3, 2, 1, 0, 1), (0, 1, 1, 2, 2, 1), (2, 1, 0, 1, 2, 2), (1, 3, 2, 0, 0, 0), (2, 0, 3, 1, 2, 2), (0, 3, 0, 2, 1, 2), (3, 0, 1, 0, 0, 1), (1, 2, 2, 1, 0, 2), (2, 1, 2, 2, 2, 1), (1, 0, 0, 2, 0, 2), (3, 0, 3, 1, 1, 2), (3, 3, 0, 0, 1, 1), (3, 2, 1, 2, 1, 2),

In [21]:
# Implementation of KNN (K_Nearest Neigbors)

model = KNeighborsClassifier(9)

model.fit(x_train, y_train)
acc = model.score(x_test, y_test)
print(f'Accuracy ==> {acc}')

Accuracy ==> 0.9421965317919075


In [24]:
predicted = model.predict(x_test)
names = ["unacc","acc", "good", "vgood"]

for x in range(len(predicted)):
    print(f"Predicted : {names[predicted[x]]}   Data : {x_test[x]}   Actual: {names[y_test[x]]}")

Predicted : good   Data : (3, 0, 2, 2, 1, 0)   Actual: good
Predicted : good   Data : (0, 0, 3, 0, 0, 0)   Actual: good
Predicted : unacc   Data : (2, 3, 3, 2, 2, 0)   Actual: unacc
Predicted : good   Data : (3, 1, 3, 1, 2, 2)   Actual: good
Predicted : good   Data : (0, 0, 3, 0, 1, 0)   Actual: good
Predicted : good   Data : (2, 3, 3, 0, 0, 0)   Actual: good
Predicted : good   Data : (0, 0, 0, 0, 2, 2)   Actual: good
Predicted : good   Data : (2, 2, 3, 0, 0, 0)   Actual: good
Predicted : good   Data : (3, 2, 3, 1, 2, 1)   Actual: good
Predicted : good   Data : (3, 0, 2, 1, 2, 2)   Actual: good
Predicted : unacc   Data : (0, 1, 0, 1, 2, 0)   Actual: unacc
Predicted : good   Data : (3, 1, 0, 0, 2, 0)   Actual: good
Predicted : good   Data : (0, 3, 2, 2, 0, 2)   Actual: good
Predicted : unacc   Data : (0, 1, 3, 1, 0, 0)   Actual: unacc
Predicted : good   Data : (2, 1, 0, 2, 2, 1)   Actual: good
Predicted : good   Data : (3, 3, 0, 0, 2, 2)   Actual: good
Predicted : good   Data : (0, 0, 2