# Model Training with scikit-learn

### Load iris Data

In [1]:
import numpy as np
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target

In [2]:
print(X.shape)
print(y.shape)

(150, 4)
(150,)


# 4-Step Modeling Pattern in scikit-learn

## K-Nearest Neighbors (KNN) Classification
- Pick a value of K
- Search K observations in the data that are nearest to the unknown
- Use the most popular response as a predicted response for the unknown 

### 1. Import the Model Class from Module

In [3]:
from sklearn.neighbors import KNeighborsClassifier

### 2. Initiate the Model
- Can specify tuning parameters (hyperparameters)

In [4]:
knn = KNeighborsClassifier(n_neighbors=1)

In [5]:
# Print the model to look at all parameters, assigned and default
knn

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')

### 3. Fit the Model with Data (Training)
- Occurs in place, do not need to assign to a new object

In [6]:
knn.fit(X, y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=1, p=2,
           weights='uniform')

### 4. Predict on new observation
- Returns a NumPy array
- 0 = setosa, 1 = versicolor, 2 = virinica

In [7]:
# Need to provide a 2D array
knn.predict([[3,5,4,2]])

array([2])

In [8]:
# Predict on multiple observations
X_pred = [[3,5,4,2], [5,4,3,2]]
knn.predict(X_pred)

array([2, 1])

## Tune the Model
- Tune the KNN model with different value of K

In [9]:
# Initiate the Model
knn = KNeighborsClassifier(n_neighbors=5)

# Fit the Model
knn.fit(X, y)

# Predict on new data
knn.predict(X_pred)

array([1, 1])

## Use a Different Model

In [10]:
# Import the class module
from sklearn.linear_model import LogisticRegression

# Initiate the Model
logreg = LogisticRegression()

# Fit the Model
logreg.fit(X, y)

# Predict on new data
logreg.predict(X_pred)

array([2, 0])