# KNN Classifer - Simple Train and Test

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn import datasets, neighbors, metrics

%matplotlib inline

## Data Prep

In [None]:
# loading iris data
iris = datasets.load_iris()

# forming a dataframe
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)

# create a target column
iris_df['species'] = iris.target

# view the dataset
iris_df.head(10)

In [None]:
# create a species mapping
map_species = dict(zip([0, 1, 2], list(iris.target_names)))

print(map_species)

In [None]:
# mapping
iris_df.species = iris_df.species.apply(lambda x: map_species[x])

# view
iris_df.head(10)

## EDA

In [None]:
# describe the dataset
iris_df.describe()

In [None]:
# look at the data types
iris_df.info()

In [None]:
# viewing the data
sns.pairplot(iris_df, hue = 'species')

## Training

In [None]:
# specifying the classifier
knn = neighbors.KNeighborsClassifier(n_neighbors=5, weights='uniform')

from sklearn.model_selection import train_test_split

# feature set
X = iris_df.drop(columns = ['species'])

# target
y = iris_df['species']

# creating training / testings datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
print(len(X_train.index))
print(len(X_test.index))

In [None]:
# fit the classifier
knn.fit(X_train,y_train)

## Testing

In [None]:
# predicted
predicted = knn.predict(X_test)

# actual
validate = pd.DataFrame(y_test)

validate.columns = ['actual']

validate['predicted'] = predicted

validate.head(10)

In [None]:
# overal accuracy... be careful!!
print('Accuracy = ', (np.sum(validate['actual'] == validate['predicted']) / len(validate))*100, '%')

## Confustion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(validate['actual'], validate['predicted'])

In [None]:
pd.crosstab(validate['actual'], validate['predicted'], rownames=['Actual'], colnames=['Predicted'], margins=True)