In [20]:
import numpy as np
np.random.seed(42)
from sklearn.datasets import load_iris
iris = load_iris()
data = iris.data
labels = iris.target
names = iris.target_names
features = iris.values()
from scipy.spatial import distance

In [21]:
class KNeighborsClassifier:
    def __init__(self, k: int, data: np.ndarray, test_data: list):
        self.k = k
        self.data = data
        self.test_data = test_data

        normalized_data = self.normalizeData()
        distances = self.euclideanDistance()
        sorted_distances = self.sortDistances(distances)
        nNeighbors = self.determineNeighbors(sorted_distances, labels)

        print('The {} closest neighbors of searched iris {} are:'.format(self.k, self.test_data))
        for name in nNeighbors:
            print("iris ", names[name])

        self.determinePropability(nNeighbors)

    def normalizeData(self):
        x = self.data
        data_min = np.min(x, axis=0)
        data_max = np.max(x, axis=0)
        x_transformed = (x - data_min) / (data_max - data_min)
        return x_transformed

    def euclideanDistance(self):
        distances = list()
        for i in self.data:
            distances.append(distance.euclidean(i, self.test_data))
        return distances

    def sortDistances(self, x:list):
        return np.argsort(x)

    def determineNeighbors(self, distances: np.ndarray, y: np.ndarray):
        targetList = list()
        neighbors = distances[0:self.k]
        for i in neighbors:
            targetList.append(y[i])
        return targetList

    def determinePropability(self, nNeighbors:list):
        amount = len(nNeighbors)
        closedNeighbor = nNeighbors[0]
        occurrences = nNeighbors.count(closedNeighbor)
        propability = occurrences/amount
        print('Propability for iris {} by examine next {} neighbors is {} %'.format(names[closedNeighbor], self.k, propability*100))


In [22]:
test_data_1 = [4.8, 2.5, 5.3, 2.4] #DATA FROM EXERCISE

In [23]:
clf = KNeighborsClassifier(20, data, test_data_1)

The 20 closest neighbors of searched iris [4.8, 2.5, 5.3, 2.4] are:
iris  virginica
iris  virginica
iris  virginica
iris  virginica
iris  virginica
iris  virginica
iris  virginica
iris  versicolor
iris  versicolor
iris  virginica
iris  versicolor
iris  versicolor
iris  virginica
iris  virginica
iris  virginica
iris  virginica
iris  versicolor
iris  versicolor
iris  virginica
iris  virginica
Propability for iris virginica by examine next 20 neighbors is 70.0 %


In [24]:
test_data_2 = [5.2,4.1,1.5,0.1] 

In [25]:
clf = KNeighborsClassifier(20, data, test_data_2)

The 20 closest neighbors of searched iris [5.2, 4.1, 1.5, 0.1] are:
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
iris  setosa
Propability for iris setosa by examine next 20 neighbors is 100.0 %


In [28]:
test_data_3 = [5.2,2.7,3.9,1.4]

In [29]:
clf = KNeighborsClassifier(20, data, test_data_3)

The 20 closest neighbors of searched iris [5.2, 2.7, 3.9, 1.4] are:
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  versicolor
iris  virginica
iris  versicolor
Propability for iris versicolor by examine next 20 neighbors is 95.0 %
