<a href="https://colab.research.google.com/github/pramodcgupta/Machine-Learning-Predictions/blob/master/KNN_FromScratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Developing KNN Model (myNearestNeighborsClassifier) in python from Scratch**

In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

## Define KNN Model
class myNearestNeighborsClassifier():  

  def __init__(self, k=5): 
    self.k = k 

  def getMaxClass(self, nearestPoints):
    lst = list(nearestPoints)
    return max(lst,key=lst.count)

  def getNearestNeighbors(self, X_NewPoint): 
    distances=[]

    for i in range(len(self.X_train)): 
      distances.append((list(self.X_train[i]), self.y_train[i], np.linalg.norm(X_NewPoint - self.X_train[i])))
    
    distances.sort(key=lambda x: x[2])  
    nearest_neighbors = list(distances)[0:self.k]

    return np.array(nearest_neighbors)[:, 1]

  def fit(self, X_train, y_train): 
    self.X_train = np.array(X_train)
    self.y_train = np.array(y_train)

  
  def predict(self, X_test): 
    self.X_test = X_test
    y_pred=[]

    for row in self.X_test: 
      nearestPoints = self.getNearestNeighbors(row)
      predicted_class = self.getMaxClass(nearestPoints)
      y_pred.append(predicted_class)

    return y_pred

**Test Model with Dummy data**

In [2]:
dataset = np.array([[2.7810836,2.550537003,0],
	[1.465489372,2.362125076,0],
	[3.396561688,4.400293529,0],
	[1.38807019,1.850220317,0],
	[3.06407232,3.005305973,0],
	[7.627531214,2.759262235,1],
	[5.332441248,2.088626775,1],
	[6.922596716,1.77106367,1],
	[8.675418651,-0.242068655,1],
	[7.673756466,3.508563011,1]])

X=dataset[:,0:2]
y=dataset[:,-1]

In [3]:
model = myNearestNeighborsClassifier(k=5)
model.fit(X,y)

In [4]:
y_pred=model.predict([[1.465489372,2.362125076]])
y_pred

[0.0]

**Implementing My KNN Model and Sklearn KNN using IRIS dataset**

In [5]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split 
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
import matplotlib.pyplot as plt

In [6]:
df = pd.read_csv("https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv")
df.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [7]:
iris_fl = LabelEncoder()
df['variety_n'] = iris_fl.fit_transform(df['variety'])
X = df.drop('variety',axis='columns')
X = X.drop('variety_n',axis='columns')
print(X.head())

   sepal.length  sepal.width  petal.length  petal.width
0           5.1          3.5           1.4          0.2
1           4.9          3.0           1.4          0.2
2           4.7          3.2           1.3          0.2
3           4.6          3.1           1.5          0.2
4           5.0          3.6           1.4          0.2


In [8]:
y = df['variety_n']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=4)
k_range = range(1,26)
scores = {}
scores_list = []

**Implementing My Model using IRIS dataset**

In [33]:
myModel = myNearestNeighborsClassifier(k=5)
myModel.fit(X,y)

In [34]:
x_new = [[5,4,3,4],[5,4,4,5]]
y_predict = myModel.predict(np.array(x_new))

In [35]:
print("\n\nprediction for values:",x_new[0],"is: ",y_predict[0])
print("prediction for values:",x_new[1],"is: ",y_predict[1])



prediction for values: [5, 4, 3, 4] is:  1
prediction for values: [5, 4, 4, 5] is:  2


**Implementing sklearn KNN using IRIS dataset**

In [37]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X,y)

KNeighborsClassifier()

In [38]:
x_new = [[5,4,3,4],[5,4,4,5]]
y_predict = knn.predict(np.array(x_new))

In [39]:
print("\n\nprediction for values:",x_new[0],"is: ",y_predict[0])
print("prediction for values:",x_new[1],"is: ",y_predict[1])



prediction for values: [5, 4, 3, 4] is:  1
prediction for values: [5, 4, 4, 5] is:  2
