# Objective

* Overview of ML Model Build Process
* Logistic Regression Introduction
* Model Evaluations

In [None]:
from __future__ import print_function  # Python 2/3 compatibility

from IPython.display import Image

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

## Model Building Process

In [None]:
Image("images/model-pipeline.png")

## Dataset

In [None]:
centers = np.array([[0, 0]] * 100 + [[1, 1]] * 100)
np.random.seed(42)
X = np.random.normal(0, 0.2, (200, 2)) + centers
y = np.array([0] * 100 + [1] * 100)

plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.RdYlBu)
plt.colorbar();

In [None]:
X[:5]

In [None]:
y[:5], y[-5:]

### Logistic Regression - Model

Take a weighted sum of the features and add a bias term to get the logit.
Sqash this weighted sum to arange between 0-1 via a Sigmoid function.

* Sigmoid Function

<img src="images/sigmoid.png",width=500>


$$f(x) = \frac{e^x}{1+e^x}$$

In [None]:
Image("images/logistic-regression.png")

In [None]:
## Build the Model

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
## Step 1 - Instantiate the Model with Hyper Parameters (We don't have any here)
model = LogisticRegression()

In [None]:
## Step 2 - Fit the Model
model.fit(X, y)

In [None]:
## Step 3 - Evaluate the Model
model.score(X, y)

In [None]:
def plot_decision_boundaries(model, X, y):
    pred_labels = model.predict(X)
    plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.RdYlBu,
                vmin=0.0, vmax=1)
    xx = np.linspace(-1, 2, 100)

    w0, w1 = model.coef_[0]
    bias = model.intercept_
    yy = -w0 / w1 * xx - bias / w1
    plt.plot(xx, yy, 'k')
    plt.axis((-1,2,-1,2))
    plt.colorbar()

In [None]:
plot_decision_boundaries(model, X, y)

## Dataset - Take 2

In [None]:
centers = np.array([[0, 0]] * 100 + [[1, 1]] * 100)
np.random.seed(42)
X = np.random.normal(0, 0.5, (200, 2)) + centers
y = np.array([0] * 100 + [1] * 100)

plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.RdYlBu)
plt.colorbar();

In [None]:
# Instantiate, Fit, Evalaute
model = LogisticRegression()
model.fit(X, y)
print(model.score(X, y))

In [None]:
y_pred = model.predict(X)

In [None]:
plot_decision_boundaries(model, X, y)

### Other Evaluation Methods

* Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cm = confusion_matrix(y, y_pred)
cm

In [None]:
pd.crosstab(y, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)