In [1]:
# import necessary libraries
from sklearn import tree, datasets
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from scipy.spatial import distance

In [2]:
# returns euclidean distance between point a and b
def euc(a,b):
    return distance.euclidean(a,b)

# A scrappy K-nearest neighbor classifier that predicts based on the closest neighbor
class ScrappyKNN():
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
    
    def predict(self, X_test):
        predictions = []
        for row in X_test:
            label = self.closest(row)
            predictions.append(label)
        return predictions
    
    def closest(self, row):
        best_dist = euc(row, self.X_train[0])
        best_index = 0
        for i in range(1, len(self.X_train)):
            dist = euc(row, self.X_train[i])
            if dist < best_dist:
                best_dist = dist
                best_index = i
        return self.y_train[best_index]

In [3]:
# load dataset of flower iris
iris = datasets.load_iris()

X = iris.data
y = iris.target

In [4]:
# split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [5]:
# initiate a decision tree classifier and fit
dt_clf = tree.DecisionTreeClassifier()
dt_clf.fit(X_train, y_train)

predictions = dt_clf.predict(X_test)
print(accuracy_score(y_test, predictions))

0.9333333333333333


In [6]:
# initiate a K-Neighbors classifier and fit
kn_clf = KNeighborsClassifier()
kn_clf.fit(X_train, y_train)

predictions = kn_clf.predict(X_test)
print(accuracy_score(y_test, predictions))

0.9555555555555556


In [7]:
# initiate a scrappy KNN classifier and fit
skn_clf = ScrappyKNN()
skn_clf.fit(X_train, y_train)

predictions = skn_clf.predict(X_test)
print(accuracy_score(y_test, predictions))

0.9333333333333333
