<a href="https://colab.research.google.com/github/rajprakrit/ML-Library-1/blob/master/K_N_N_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from sklearn import datasets

In [0]:
#defining the euclidian distance function
def sqr_Distance(x1, x2):
    return np.sqrt(np.sum(np.square(x1-x2)))


#accuracy function
def _accuracy(predictions, y):
  acc = np.sum(predictions == y)/len(y)
  return acc*100


#splitting current dataframe or the array to train and test sets
def train_test_split(X, y, ratio):
  size = X.shape[0];
  count = size*(1-ratio)
  count = int(count)
  X_train = X[:count]
  y_train = y[:count]
  X_test = X[count:]
  y_test = y[count:]

  return X_train, X_test, y_train, y_test


#creating a class for KNN
class KNN:
    '''defining a class for training our KNN model'''
    
    #init method with input as k(default = 3 for all objects)
    def __init__(self, k = 3):
      '''it contains only one input i.e k'''
        self.k = k
        
    #train function(just storing the learning input)
    def fit(self, X, y):
      '''training the model only comprises of storing the inputs '''

        self.X_train = X
        self.y_train = y


    #predicting labels(output)   
    def predict(self, X):
      '''this method predicts the labels for every input'''

        predicted_labels = [self.predict_algorithm(i) for i in X]
        return np.array(predicted_labels)


    #algorithm for prediction  
    def predict_algorithm(self, X):
      '''Using this method we can predict the label with heighest frequency among the k nearest labels'''
        # distances from each point
        distances = [sqr_Distance(X, i) for i in self.X_train]
        
        # k nearest samples
        k_indeces = np.argsort(distances)[:self.k]  ## it extracts first k elements(i.e index of elements) with shortest distance considering euclidian distance
        k_nearest_labels = [self.y_train[i] for i in k_indeces] ## getting labels of first k nearest points considering euclidian distance
        
        #most common label
        most_common = Counter(k_nearest_labels).most_common(1) ##it returns 1st most frequent element and its frequency
        return most_common[0][0] ## gives the element 
    
        

In [0]:
mnist = datasets.load_digits() #loading MNIST data set

In [0]:
X, y = mnist.data, mnist.target #breaking the dataset into input and target

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 0.2) #using split function to split data 

In [0]:
clf = KNN() #creating an object which belongs to class KNN 

In [0]:
clf.fit(X_train, y_train) # training part

In [0]:
predictions = clf.predict(X_test) 

In [0]:
print("the accuracy of our model is:") ## using the global accuracy function to get accuracy
_accuracy(predictions, y_test)

the accuracy of our model is:


96.11111111111111

**USING SKLEARN LIBRARY FOR TRAINING OUR MODEL**

In [0]:
from sklearn.neighbors import KNeighborsClassifier 

In [0]:
clf1 = KNeighborsClassifier(n_neighbors= 3)

In [0]:
clf1.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=3, p=2,
                     weights='uniform')

In [0]:
training_score = clf1.score(X_train, y_train) 
test_score = clf1.score(X_test, y_test)

In [0]:
accuracy = test_score*100

In [0]:
accuracy 

96.66666666666667