# Hello, Neighbor

The goal for this notebook is to create a Nearest Neighbor algorithm from scratch, and compare it against the SKLearn package to see if the results from our custom algorithm line up with what SKLearn produces

In [2]:
# dependencies
import pandas as pd
import numpy as np
import math

In [11]:
class NearestNeighbor:
    
    def _compute_distance(self, x1, x2, y1, y2):
        """Computes the Euclidean distance for 2, 2d points
        
        Parameters
        ----------
        
        x1 : float
        
        x2 : float
        
        y1 : float
        
        y2 : float
        
        
        Returns
        -------
        float
            The computed Euclidean distance between the provided points
        
        """
        return math.sqrt(((x2 - x1)**2) + ((y2 - y1)**2))
    
    def fit(self, features, target):
        """Prepares the nearest neighbor model for prediction
        
        Parameters
        ----------
        features : array-like, shape=(2,n)
            The features of the model. There can be at most 2 features,
            but n data points.
        
        target : array-like, shape=(1,n)
            The target class of the model. There must be only 1 target column.
            n rows much match the n rows of features.
        
        """
        features = np.array(features)
        target = np.array(target)
        
        self.features = features
        self.target = target
    
    def predict(self, X):
        """ Predicts the class of `X` features.
        
        Parameters
        ----------
        X : array-like, shape=(2,)
            The feature array. In this case, 0 represents `loudness` and 1 represents `duration`.
        
        Returns
        -------
        str
            The predicted class label
        
        """
        
        smallest_distance = None
        
        for index, row in enumerate(self.features):
            x1 = row[0]
            y1 = row[1]
            
            distance = self._compute_distance(x1=x1, x2=X[0], y1=y1, y2=X[1])
            
            if smallest_distance is None:
                smallest_distance = [distance, index]
            elif distance < smallest_distance[0]:
                smallest_distance = [distance, index]
        
        
        # grab the class label associated with the prediction
        if smallest_distance is not None:
            predicted_class = self.target[index]
            return predicted_class
            
        else:
            raise ValueError("Not enough data points to compute distance. Please fix")
        

In [12]:
# some dummy data (songs)

duration = [184, 134, 243, 186, 122, 197, 294, 382, 102, 264, 
                     205, 110, 307, 110, 397, 153, 190, 192, 210, 403,
                     164, 198, 204, 253, 234, 190, 182, 401, 376, 102]
loudness = [18, 34, 43, 36, 22, 9, 29, 22, 10, 24, 
                     20, 10, 17, 51, 7, 13, 19, 12, 21, 22,
                     16, 18, 4, 23, 34, 19, 14, 11, 37, 42]
# We know whether the songs in our training data are jazz or not.
jazz = [1, 0, 0, 0, 1, 1, 0, 1, 1, 0,
            0, 1, 1, 0, 1, 1, 0, 1, 1, 1,
            1, 1, 1, 1, 0, 0, 1, 1, 0, 0]

            
            
nn_model = NearestNeighbor()
            
nn_model.fit([duration, loudness], jazz)

In [14]:
# predict a value, in this case, 150 for duration, 43 for loudness
if nn_model.predict([150, 43]) == 0:
    print("Rock")
else:
    print("Jazz")

Rock
