In [1]:
# K-Nearest Neighbor Classification

# A supervised classifier that memorizes observations from within a test set to predict classification labels for new, unlabeled observations.
# It makes predictions based on how similar training observations are to the new, incoming observations.
# The more similar the observation values, the more likely they will be classified with the same label.

In [2]:
# Use cases of K-Nearest Neighbor

# Stock Price Prediction
# Predictive Trip Planning
# Credit Risk Analysis
# Recommendation Systems

In [3]:
# Assumptions

# Dataset has a little noise
# Dataset is labeled
# Dataset only contains relevant features
# Dataset has distinguishable subgroups
# Avoid using KNN on large datasets. It will probably take a long time.

In [6]:
# Instance based learning with K_Nearest Neighbor

import numpy as np
import pandas as pd
import scipy
import urllib
import sklearn

import matplotlib.pyplot as plt
from pylab import rcParams

from sklearn import neighbors
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [7]:
from sklearn.neighbors import KNeighborsClassifier

In [8]:
np.set_printoptions(precision=4, suppress=True)
%matplotlib inline
rcParams['figure.figsize'] = 7,4
plt.style.use('seaborn-whitegrid')

  plt.style.use('seaborn-whitegrid')


In [9]:
# Importing the data

address = './Data/mtcars.csv'
cars = pd.read_csv(address)
cars.columns = ['car_names',
                'mpg',
                'cyl',
               'disp',
               'hp',
               'drat',
               'wt',
               'qsec',
               'vs',
               'am',
               'gear',
               'carb']

X_prime = cars[['mpg', 'disp', 'hp', 'wt']].values
y = cars.iloc[:,9].values

In [16]:
X = preprocessing.scale(X_prime)

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=17)

In [18]:
# Building and training your model with training data

clf = neighbors.KNeighborsClassifier()
clf.fit(X_train, y_train)
print(clf)

KNeighborsClassifier()


In [19]:
# Evaluating the model's predictions

y_pred = clf.predict(X_test)
y_expect = y_test

print(metrics.classification_report(y_expect, y_pred))

              precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.67      0.80         3

    accuracy                           0.86         7
   macro avg       0.90      0.83      0.84         7
weighted avg       0.89      0.86      0.85         7



In [20]:
# High Precision + Low Recall = Few results returned, but many of the label predictions that are returned are correct.