# KNN

## import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import sklearn

## load dataset

In [2]:
from sklearn.datasets import load_iris

iris = load_iris()
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [3]:
df = pd.DataFrame(iris["data"])
df.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [4]:
iris["feature_names"]

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [5]:
iris["target_names"]

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

## assign features and targets

In [6]:
x = df
y = iris["target"]

## train_test_split

In [8]:
from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size = 0.25, random_state = 4)
print(xtrain.shape)
print(ytrain.shape)
print(xtest.shape)
print(ytest.shape)

(112, 4)
(112,)
(38, 4)
(38,)


## create knn model

In [9]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors = 3)
knn

KNeighborsClassifier(n_neighbors=3)

## fit data and predict for new data

In [10]:
knn.fit(xtrain, ytrain)

KNeighborsClassifier(n_neighbors=3)

In [13]:
x_new = np.array([[5, 2.9,1, 0.2]])
prediction = knn.predict(x_new)
prediction

array([0])

In [14]:
iris["target_names"][prediction]

array(['setosa'], dtype='<U10')

## accuracy and confusion matrix

In [18]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [19]:
ypred = knn.predict(xtest)

In [20]:
matrix = confusion_matrix(ytest, ypred)
matrix

array([[18,  0,  0],
       [ 0,  7,  1],
       [ 0,  0, 12]], dtype=int64)

In [21]:
accuracy = accuracy_score(ytest, ypred)
accuracy

0.9736842105263158

In [22]:
classification = classification_report(ytest, ypred)
classification

'              precision    recall  f1-score   support\n\n           0       1.00      1.00      1.00        18\n           1       1.00      0.88      0.93         8\n           2       0.92      1.00      0.96        12\n\n    accuracy                           0.97        38\n   macro avg       0.97      0.96      0.96        38\nweighted avg       0.98      0.97      0.97        38\n'

## kfold cross validation

In [23]:
x = iris.data
y = iris.target

In [25]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

### Logistic Regression

In [28]:
lr = cross_val_score(LogisticRegression(), x, y)
lr

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


array([0.96666667, 1.        , 0.93333333, 0.96666667, 1.        ])

In [31]:
np.average(lr)

0.9733333333333334

### Decision Tree

In [29]:
dtc = cross_val_score(DecisionTreeClassifier(), x, y)
dtc

array([0.96666667, 0.96666667, 0.9       , 1.        , 1.        ])

In [32]:
np.average(dtc)

0.9666666666666668

### Support Vector Machine

In [30]:
svc = cross_val_score(SVC(), x, y)
svc

array([0.96666667, 0.96666667, 0.96666667, 0.93333333, 1.        ])

In [33]:
np.average(svc)

0.9666666666666666

In [58]:
lst = []
lst.append(np.average(lr))
lst.append(np.average(dtc))
lst.append(np.average(svc))
lst

[0.9733333333333334, 0.9666666666666668, 0.9666666666666666]

In [59]:
max(lst)

0.9733333333333334