## K Nearest Neighbors (KNN) Practice

### Step 1 - Import Libraries

#### Import Data Exploration Libraries

In [1]:
import pandas as pd
import numpy as np

#### Import Data Visualization Libraries

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

### Step 2 - Read in dataset


In [3]:
# Use pandas .read_csv() method to read in classified dataset
# index_col -> argument assigns the index to a particular column

df = pd.read_csv('Classified Data', index_col=0)

In [4]:
# Use the .head() method to display the first few rows
df.head()

Unnamed: 0,WTT,PTI,EQW,SBI,LQE,QWG,FDJ,PJF,HQE,NXJ,TARGET CLASS
0,0.913917,1.162073,0.567946,0.755464,0.780862,0.352608,0.759697,0.643798,0.879422,1.231409,1
1,0.635632,1.003722,0.535342,0.825645,0.924109,0.64845,0.675334,1.013546,0.621552,1.492702,0
2,0.72136,1.201493,0.92199,0.855595,1.526629,0.720781,1.626351,1.154483,0.957877,1.285597,0
3,1.234204,1.386726,0.653046,0.825624,1.142504,0.875128,1.409708,1.380003,1.522692,1.153093,1
4,1.279491,0.94975,0.62728,0.668976,1.232537,0.703727,1.115596,0.646691,1.463812,1.419167,1


### Step 3 - Standardize the scale to prep for KNN algorithm


In [5]:
# Import module to standardize the scale

from sklearn.preprocessing import StandardScaler

In [6]:
# Create instance (i.e. object) of the standard scaler

scaler = StandardScaler()

In [8]:
# Fit the object to all the data except the Target Class
# use the .drop() method to gather all features except Target Class
# axis -> argument refers to columns; a 0 would represent rows

scaler.fit(df.drop('TARGET CLASS', axis=1))

StandardScaler(copy=True, with_mean=True, with_std=True)

In [9]:
# Use scaler object to conduct a transforms

scaled_features = scaler.transform(df.drop('TARGET CLASS',axis=1))

In [10]:
# Review the array of values generated from the scaled features process
scaled_features

array([[-0.12354188,  0.18590747, -0.91343069, ..., -1.48236813,
        -0.9497194 , -0.64331425],
       [-1.08483602, -0.43034845, -1.02531333, ..., -0.20224031,
        -1.82805088,  0.63675862],
       [-0.78870217,  0.33931821,  0.30151137, ...,  0.28570652,
        -0.68249379, -0.37784986],
       ...,
       [ 0.64177714, -0.51308341, -0.17920486, ..., -2.36249443,
        -0.81426092,  0.11159651],
       [ 0.46707241, -0.98278576, -1.46519359, ..., -0.03677699,
         0.40602453, -0.85567   ],
       [-0.38765353, -0.59589427, -1.4313981 , ..., -0.56778932,
         0.3369971 ,  0.01034996]])

In [11]:
# Use the scaled features to create a data frame of features

df_feat = pd.DataFrame(scaled_features, columns= df.columns[:-1])

In [12]:
# Review the new scaled dataframe

df_feat.head()

Unnamed: 0,WTT,PTI,EQW,SBI,LQE,QWG,FDJ,PJF,HQE,NXJ
0,-0.123542,0.185907,-0.913431,0.319629,-1.033637,-2.308375,-0.798951,-1.482368,-0.949719,-0.643314
1,-1.084836,-0.430348,-1.025313,0.625388,-0.444847,-1.152706,-1.129797,-0.20224,-1.828051,0.636759
2,-0.788702,0.339318,0.301511,0.755873,2.031693,-0.870156,2.599818,0.285707,-0.682494,-0.37785
3,0.982841,1.060193,-0.621399,0.625299,0.45282,-0.26722,1.750208,1.066491,1.241325,-1.026987
4,1.139275,-0.640392,-0.709819,-0.057175,0.822886,-0.936773,0.596782,-1.472352,1.040772,0.27651
