In [1]:
# Importing libraries

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from scipy.stats import mode

from sklearn.neighbors import RadiusNeighborsClassifier

import warnings

warnings.filterwarnings("ignore")

In [2]:
# Radius Nearest Neighbors Classification

class Radius_Nearest_Neighbors_Classifier() : 
    
    def __init__( self, r ) :
        
        self.r = r
        
    # Function to store training set
        
    def fit( self, X_train, Y_train ) :
        
        self.X_train = X_train
        
        self.Y_train = Y_train
        
        # no_of_training_examples, no_of_features
        
        self.m, self.n = X_train.shape
    
    # Function for prediction
        
    def predict( self, X_test ) :
        
        self.X_test = X_test
        
        # no_of_test_examples, no_of_features
        
        self.m_test, self.n = X_test.shape
        
        # initialize Y_predict
        
        Y_predict = np.zeros( self.m_test )
        
        for i in range( self.m_test ) :
            
            x = self.X_test[i]
            
            # find the number of neighbors within a fixed radius r of current training example
            
            neighbors = self.find_neighbors( x )
            
            # most frequent class in the circle drawn by current training example of fixed radius r
            
            Y_predict[i] = mode( neighbors )[0][0]
            
        return Y_predict
    
    # Function to find the number of neighbors within a fixed radius r of current training example
          
    def find_neighbors( self, x ) :
        
        # list to store training examples which will fall in the circle
        
        inside = []
        
        for i in range( self.m ) :
            
            d = self.euclidean( x, self.X_train[i] )
            
            if d <= self.r :
                
                inside.append( self.Y_train[i] )

        inside_array = np.array( inside )
                
        return inside_array
    
    # Function to calculate euclidean distance
            
    def euclidean( self, x, x_train ) :
        
        return np.sqrt( np.sum( np.square( x - x_train ) ) )

In [3]:
def main() :
    
    # Create dataset
    
    df = pd.read_csv( "diabetes.csv" )

    X = df.iloc[:,:-1].values

    Y = df.iloc[:,-1:].values
    
    # Splitting dataset into train and test set

    X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 1/3, random_state = 0 )
    
    # Model training
    
    model = Radius_Nearest_Neighbors_Classifier( r = 550 )
    
    model.fit( X_train, Y_train )
    
    model1 = RadiusNeighborsClassifier( radius = 550 )
    
    model1.fit( X_train, Y_train )
    
    # Prediction on test set

    Y_pred = model.predict( X_test )
    
    Y_pred1 = model1.predict( X_test )
    
    # measure performance
    
    correctly_classified = 0
    
    correctly_classified1 = 0
    
    # counter
    
    count = 0
    
    for count in range( np.size( Y_pred ) ) :
        
        if Y_test[count] == Y_pred[count] :
            
            correctly_classified = correctly_classified + 1
            
        if Y_test[count] == Y_pred1[count] :
            
            correctly_classified1 = correctly_classified1 + 1
        
        count = count + 1
        
    print("Accuracy on test set by our model     : ", ( correctly_classified / count ) * 100 )
    
    print("Accuracy on test set by sklearn model : ", ( correctly_classified / count ) * 100 )

In [4]:
if __name__ == "__main__" : 
    
    main()

Accuracy on test set by our model     :  61.111111111111114
Accuracy on test set by sklearn model :  61.111111111111114
