# K-Nearest Neighbors (K-NN)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('CKD_Preprocessed.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [3]:
print(dataset)

     Age (yrs)  ...  Chronic Kidney Disease: yes
0         48.0  ...                          1.0
1          7.0  ...                          1.0
2         62.0  ...                          1.0
3         48.0  ...                          1.0
4         51.0  ...                          1.0
..         ...  ...                          ...
395       55.0  ...                          0.0
396       42.0  ...                          0.0
397       12.0  ...                          0.0
398       17.0  ...                          0.0
399       58.0  ...                          0.0

[400 rows x 25 columns]


In [4]:
print(dataset)
print(X)
print(y)

     Age (yrs)  ...  Chronic Kidney Disease: yes
0         48.0  ...                          1.0
1          7.0  ...                          1.0
2         62.0  ...                          1.0
3         48.0  ...                          1.0
4         51.0  ...                          1.0
..         ...  ...                          ...
395       55.0  ...                          0.0
396       42.0  ...                          0.0
397       12.0  ...                          0.0
398       17.0  ...                          0.0
399       58.0  ...                          0.0

[400 rows x 25 columns]
[[48.    80.     1.02  ...  0.     0.     0.   ]
 [ 7.    50.     1.02  ...  0.     0.     0.   ]
 [62.    80.     1.01  ...  1.     0.     1.   ]
 ...
 [12.    80.     1.02  ...  0.     0.     0.   ]
 [17.    60.     1.025 ...  0.     0.     0.   ]
 [58.    80.     1.025 ...  0.     0.     0.   ]]
[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1

## Splitting the dataset into the Training set and Test set

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [6]:
print(X_train)

[[40.    80.     1.025 ...  0.     0.     0.   ]
 [46.    70.     1.015 ...  0.     0.     1.   ]
 [80.    70.     1.02  ...  0.     0.     0.   ]
 ...
 [50.    70.     1.02  ...  0.     0.     0.   ]
 [11.    80.     1.01  ...  0.     0.     0.   ]
 [62.    80.     1.01  ...  0.     0.     0.   ]]


In [7]:
print(y_train)

[0. 1. 0. 1. 0. 0. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 1. 1.
 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1.
 1. 1. 0. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 0.
 1. 0. 1. 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 0. 1. 1. 1.
 1. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0.
 0. 0. 1. 1. 0. 1. 1. 0. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1.
 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 1.
 1. 1. 0. 0. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1.
 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 1. 0. 1. 1. 0.
 1. 1. 1. 1. 1. 0. 0. 0. 0. 1. 1. 1. 0. 0. 1. 1. 0. 1. 0. 1. 0. 0. 0. 1.
 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0.
 0. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 0. 0. 1. 0. 1. 1. 0. 1. 1. 1. 0.
 1. 0. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1.]


In [8]:
print(X_test)

[[50.    90.     1.02  ...  0.     1.     1.   ]
 [51.    60.     1.02  ...  0.     0.     0.   ]
 [63.    70.     1.025 ...  0.     0.     0.   ]
 ...
 [52.    80.     1.025 ...  0.     0.     0.   ]
 [42.    80.     1.02  ...  0.     0.     0.   ]
 [73.    80.     1.025 ...  0.     0.     0.   ]]


In [9]:
print(y_test)

[1. 0. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1. 0.
 1. 0. 1. 1. 0. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 1. 0. 1. 0. 0.
 1. 1. 1. 1. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 0. 1. 0.
 1. 1. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1. 0. 0. 0. 0. 0. 1. 1. 1. 1.
 1. 0. 0. 0.]


## Feature Scaling

In [10]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [11]:
print(X_train)

[[-0.64316848  0.29307272  1.34202869 ... -0.52075564 -0.4843221
  -0.39787078]
 [-0.29825249 -0.43355387 -0.47134613 ... -0.52075564 -0.4843221
   2.51337884]
 [ 1.65627145 -0.43355387  0.43534128 ... -0.52075564 -0.4843221
  -0.39787078]
 ...
 [-0.0683085  -0.43355387  0.43534128 ... -0.52075564 -0.4843221
  -0.39787078]
 [-2.31026244  0.29307272 -1.37803354 ... -0.52075564 -0.4843221
  -0.39787078]
 [ 0.62152348  0.29307272 -1.37803354 ... -0.52075564 -0.4843221
  -0.39787078]]


In [12]:
print(X_test)

[[-0.0683085   1.01969932  0.43534128 ... -0.52075564  2.0647416
   2.51337884]
 [-0.0108225  -1.16018046  0.43534128 ... -0.52075564 -0.4843221
  -0.39787078]
 [ 0.67900948 -0.43355387  1.34202869 ... -0.52075564 -0.4843221
  -0.39787078]
 ...
 [ 0.0466635   0.29307272  1.34202869 ... -0.52075564 -0.4843221
  -0.39787078]
 [-0.52819649  0.29307272  0.43534128 ... -0.52075564 -0.4843221
  -0.39787078]
 [ 1.25386946  0.29307272  1.34202869 ... -0.52075564 -0.4843221
  -0.39787078]]


## Training the K-NN model on the Training set

In [13]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

## Predicting a new result

In [14]:
print(classifier.predict(sc.transform([[53,90,1.02,2,0,70,107,7.2,114,3.7,9.5,29,12100,3.7,0,0,1,0,1,1,0,1,0,1]])))

[1.]


## Predicting the Test set results

In [15]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[1. 1.]
 [0. 0.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [0. 1.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [1. 1.]
 [0. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [0. 0.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]


## Making the Confusion Matrix

In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[38  0]
 [ 2 60]]


0.98