In [None]:
# https://towardsdatascience.com/hands-on-end-to-end-automated-machine-learning-a50e6bce6512

In [None]:
import pandas as pd
import numpy as np

from sklearn import datasets
data = datasets.load_iris()

In [None]:
df = pd.DataFrame(data = np.column_stack((data.data, data.target))
                 , columns=data.feature_names + ['Species'])
df.head()

In [None]:
import seaborn as sns
sns.pairplot(df, hue='Species', size=3)

In [None]:
# encoding
from sklearn.preprocessing import LabelEncoder
df['Species'] = LabelEncoder().fit_transform(df['Species'])
df.head()

In [None]:
# normalize
from sklearn.preprocessing import StandardScaler
df.iloc[:,:4] = StandardScaler().fit_transform(df.iloc[:,:4])
df.head()

In [None]:
# train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:,:-1].values,
 df['Species'],
 test_size = 0.4,
 random_state = 123)

In [None]:
# create the model
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train,y_train)
predictions = knn.predict(X_test)
predictions

In [None]:
# show a confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test,predictions)
print(cm)

In [None]:
# accuracy score
from sklearn.metrics import accuracy_score
score = accuracy_score(y_test,predictions)
print(score)

In [None]:
# optimize the model using a grid search
from sklearn.model_selection import GridSearchCV
k_range = list(range(1, 31))
print(k_range)
param_grid = dict(n_neighbors=k_range)
print(param_grid)
grid = GridSearchCV(knn, param_grid, scoring='accuracy')
grid.fit(X_train, y_train)
 
print(grid.best_params_['n_neighbors'])

In [None]:
# pipelining
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
pipeline = Pipeline([
 ('normalizer', StandardScaler()), #Step1 - normalize data
 ('clf', KNeighborsClassifier(n_neighbors = 3)) #step2 - classifier
])
print(pipeline.steps)

In [None]:
# cross validate the pipeline
from sklearn.model_selection import cross_validate
# set a parameter on the pipeline - this sets the n_neighbor param for the clf pipeline
pipeline.set_params(clf__n_neighbors=5)
scores = cross_validate(pipeline, X_train, y_train)
print(scores)