In [25]:
class kNN:

    # Initialize all the attributes of the kNN class.
    def __init__(self, data_classified, data_unclassified):
        self.data_classified = data_classified
        self.data_unclassified = data_unclassified
        
        self.input_variables = []
        self.target_values = []

        self.distance_target_pairs = []
        self.sorted_pairs = []

        self.k_nearest_neighbors = []
        self.kNN_mode = 0

        self.distances_from_unclassified = []


    # Copy the classified data's input variables into a seperate array.
    def get_input_variables(self, target_value_index=-1):
      # Since datasets usually reserve the last column for labels,
      # the default value for the target value's index in a row is set to -1.
      self.input_variables = []
      self.target_values = []
      for row in self.data_classified:
        row_copy = row[:]
        target_value = row_copy.pop(target_value_index)
        self.input_variables.append(row_copy)
        self.target_values.append(target_value)
  
    # Create a function to calculate the Euclidian distance between two vectors.
    def calculate_euclidian_distance(self, vector1, vector2):
      distance_squared = 0
      for vector1_element, vector2_element in zip(vector1, vector2):
        distance_squared += (vector1_element - vector2_element)**2
      distance = distance_squared**0.5
      return distance

    # Calculate the Euclidian distance between the new data point and the
    # already-classified data points.
    def find_kNN(self):
      
      # Append the Euclidian distance values between the unclassified
      # and classified vectors into an array.
      for feature in self.input_variables:
        distance = self.calculate_euclidian_distance(self.data_unclassified, feature)
        self.distances_from_unclassified.append(distance)
      
      # Create a list of distance-target pairs for determining which class
      # is represented the most in the top k nearest neighbors.
      distance_target_pairs = []
      for distance, target in zip(self.distances_from_unclassified, self.target_values):
          pair = [distance, target]
          distance_target_pairs.append(pair)
      
      # Sort the list of distance-target pairs in descending order with
      # respect to distance.
      self.sorted_pairs = sorted(self.distance_target_pairs)
    
    
    # Predict the classes of the unclassified data points through majority voting.
    def make_prediction(self, k=3):
        
        # Append the first k values of the sorted pairs list into another list.
        limit = 0
        for feature, target in self.sorted_pairs:
            if limit >= k:
                break
            else:
                self.k_nearest_neighbors.append(target)
                limit += 1
        
        # Find the mode in the newly created list to predict the class.
        self.kNN_mode = max(set(self.k_nearest_neighbors), key = self.k_nearest_neighbors.count)


In [26]:
# Test
data_unclassified_1 = [1, 0, 82, 140]
data_classified_1 = [[1, 0, 73, 160, 0],
                    [1, 1, 16, 516, 1],
                    [1, 1, 12, 502, 1]]

dataset_1 = kNN(data_classified_1, data_unclassified_1)
dataset_1.get_input_variables()
print(dataset_1.input_variables)
dataset_1.find_kNN()
print(dataset_1.sorted_pairs)
print(dataset_1.distance_target_pairs)
dataset_1.make_prediction(3)
print(dataset_1.k_nearest_neighbors)
print(dataset_1.kNN_mode)

[[1, 0, 73, 160], [1, 1, 16, 516], [1, 1, 12, 502]]
[]
[]


ValueError: max() arg is an empty sequence