## Import Scikit Learn

In [1]:
import sklearn

## Import dataset

In [7]:
from sklearn.datasets import load_iris

In [None]:
load_iris(return_X_y = True)

## Store as variables

In [9]:
X, y = load_iris(return_X_y=True)

## Import Classification Models

In [30]:
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier

In [31]:
model1 = GaussianNB()
model2 = DecisionTreeClassifier()

## Split into Training and Testing

In [12]:
from sklearn.model_selection import train_test_split

In [13]:
x_train, x_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=13)

## Data Preprocessing

In [14]:
from sklearn.preprocessing import StandardScaler

In [18]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

## Training the Model

In [34]:
model1.fit(x_train,y_train)

## Testing the Model

In [35]:
prediction = model1.predict(x_test)
print(prediction)

[1 1 0 2 2 0 2 2 0 1 2 1 1 0 2 0 2 2 1 0 1 0 0 2 1 2 0 2 1 2]


## Evaluating the Prediction

In [36]:
sklearn.metrics.accuracy_score(y_test, prediction)

0.9666666666666667

In [26]:
sklearn.metrics.precision_score(y_test, prediction, average='macro')

0.9629629629629629

In [29]:
sklearn.metrics.recall_score(y_test, prediction, average='macro')

0.9743589743589745

## Using Pipelines to preprocess and train in one go

In [41]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline

pipe = Pipeline([
    ("scale", StandardScaler()),
    ("model", KNeighborsClassifier())
])
pipe.fit(X,y)

## Using Cross Validation

In [43]:
from sklearn.model_selection import GridSearchCV

mod = GridSearchCV(estimator=pipe, param_grid={'model__n_neighbors': [1,2,3,4,5,6,7,8,9,10]}, cv=3)
mod.fit(X,y)

In [47]:
mod.cv_results_

{'mean_fit_time': array([0.00208227, 0.00109498, 0.00082334, 0.0006899 , 0.00071828,
        0.00067504, 0.00069928, 0.00067639, 0.000664  , 0.00067266]),
 'std_fit_time': array([1.47345009e-03, 7.45352899e-05, 4.79177190e-05, 1.90906977e-05,
        2.62840189e-05, 3.66437972e-06, 3.48129227e-05, 8.09219491e-06,
        6.51192679e-06, 8.88746806e-06]),
 'mean_score_time': array([0.00483799, 0.00307639, 0.00215467, 0.00198229, 0.00201615,
        0.00406639, 0.00195018, 0.00191728, 0.00190417, 0.00190171]),
 'std_score_time': array([3.06807444e-03, 3.52814728e-04, 9.37114091e-05, 4.18985559e-05,
        6.99024228e-05, 3.02672747e-03, 2.83068448e-05, 2.65272739e-05,
        1.31243358e-05, 1.10023279e-05]),
 'param_model__n_neighbors': masked_array(data=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',
             dtype=object),
 'params': [{'model__n_neighbors': 1},
