# Classification
### SLM003 06/08/2018

References:

`ISL04` James G., Witten D., Hastie T., Tibshirani R. (2013) **Classification**. In: _An Introduction to Statistical Learning_. Springer Texts in Statistics, vol 103. Springer, New York, NY. doi: https://doi.org/10.1007/978-1-4614-7138-7_4

`ESL04` Hastie T., Tibshirani R., Friedman J. (2009) **Linear Methods for Classification**. In: _The Elements of Statistical Learning_ (2nd ed.). Springer Series in Statistics. Springer, New York, NY. doi: https://doi.org/10.1007/978-0-387-84858-7_4

# Outline
1. Logistic regression
2. Discriminant analysis
  1. Linear discriminant analysis (LDA)
  2. Quadratic discriminant analysis (QDA)

# What is "classification"?

- **Supervised learning**: use inputs to predict output
- Classification predicts _**qualitative**_ (a.k.a. _categorical_, _discrete_) outputs
- Input: _**predictors**_ (a.k.a. _features_, _independent variables_, $X$) -- quantitative and/or qualitative
- Output: _**response**_ (a.k.a. _target_, _dependent variable_, $y$)
  - which may be refered to as different _response levels_, _targets_, _**classes**_, _categories_

# Logistic regression

Goal: describe predictor-response relationship using the **logistic model**

The _logistic function_ is defined as:
$$
p(X) = \frac{\exp(\beta_0 + \beta_1 X_1 + ... + \beta_p X_p)}{1 + \exp(\beta_0 + \beta_1 X_1 + ... + \beta_p X_p)}\tag{4.6}
$$

- $p(X)$: predicted response
- $X_i$: predictors
- $\beta_i$: parameters of the model
  - $\beta_0$: _intercept_
  - other $\beta_i$: _coefficients_

In [78]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import mpl_toolkits.mplot3d.art3d as art3d
from matplotlib.patches import Rectangle
from ipywidgets import interactive, Button
from sklearn import datasets
from slm import viz

# Fitting a logistic model to binary response

In [81]:
data = datasets.load_iris()

dataX = data.data[:,:1]
dataY = (data.target > 0).astype(int)
print("n_observations (n): {}".format(dataX.shape[0]))
print("n_predictors (p): {}".format(dataX.shape[1]))
print("n_class (k): {}".format(len(np.unique(dataY))))

n_observations (n): 150
n_predictors (p): 1
n_class (k): 2


In [80]:
def logistic(x: float) -> float:
    '''Take linear, return logistic.'''
    return np.exp(x) / (1 + np.exp(x))

def logreg_predict(x, coeff, intercept):
    return logistic(np.dot(x, coeff.T).squeeze() + intercept)

In [101]:
def plot_logistic(beta0=-15, beta1=3):

    fig, ax = plt.subplots(1, 2, figsize=(15,6))
    X = np.linspace(0, 10, 50)
    Y = beta0 + beta1*X
    ax[0].plot(X, Y, c='grey')
    ax[0].set_xlim(0,10)
    ax[0].set_ylim(-40,40)
    ax[0].set_xlabel('x')
    ax[0].set_title('Logit', fontsize=15)
    ax[0].set_ylabel(r'$\log\frac{p(X)}{1-p(X)}=\beta_0+\beta_1X$', fontsize=15)
    ax[1].plot(X, logistic(Y), c='grey')
    precision = dataY==np.round(logistic(beta0+beta1*dataX).squeeze())
    cim = ax[1].scatter(dataX, dataY, s=80, marker='o',\
                        c=precision, \
                        cmap=cm.get_cmap('seismic_r', 2))
    cim.set_facecolor('none')
    ax[1].set_xlim(3,9)
    ax[1].set_ylim(-0.5,1.5)
    ax[1].set_yticks([0,1])
    ax[1].set_xlabel('x', fontsize=15)
    ax[1].set_ylabel(r'p(x)', fontsize=15)
    ax[1].set_title('Logistic model', fontsize=15)
    ax[1].text(3.2,1.3, 'Error rate: {:.3}'.format(
        1-len(precision.nonzero()[0])/len(precision)), fontsize=15)
    cax = plt.colorbar(cim)
    cax.set_ticks([0.25, 0.75])
    cax.ax.set_yticklabels(['incorrect', 'correct'])
    plt.show()

    return X, Y

fit_logreg = interactive(plot_logistic, beta0=(-40., 40.), beta1=(-10.,10.))

In [102]:
display(fit_logreg)

interactive(children=(FloatSlider(value=-15.0, description='beta0', max=40.0, min=-40.0), FloatSlider(value=3.…

# Fitting a logistic function to binary response, with 2 predictors ($p = 2$)

In [103]:
dataX = data.data[:,:2]
dataY = (data.target > 0).astype(int)
print("n_observations (n): {}".format(dataX.shape[0]))
print("n_predictors (p): {}".format(dataX.shape[1]))
print("n_class (k): {}".format(len(np.unique(dataY))))

n_observations (n): 150
n_predictors (p): 2
n_class (k): 2


In [120]:
def plot_logistic_2d(beta0=-1, beta1=3, beta2=-5, azim=-123, elev=16):
    X = np.vstack((np.linspace(0, 10, 100), np.linspace(0, 10, 100))).T
    X_mesh = viz.make_mesh(X, 0.1)
    Y = np.dot(X_mesh, np.array([beta1, beta2]).T).squeeze() + beta0
    
    fig = plt.figure(figsize=(15,6))
    
    ax = fig.add_subplot(2,3,1)
    ax.plot(X_mesh[:,1,1], Y[:,0], lw=1, c='grey')
    ax.set_title('Logit', fontsize=15)
    ax.set_xlabel('$x_0$', fontsize=15)
    ax.set_xlim(0,10)
    ax.set_ylim(-40,40)
    ax.set_ylabel(r'$\log\frac{p(X)}{1-p(X)}$', fontsize=15)
    ax.text(0, 40, '$where\ x_1=0$', fontsize=15, verticalalignment='top')
    
    ax = fig.add_subplot(2,3,4)
    ax.plot(X_mesh[0,:,0], Y[0,:], lw=1, c='grey')
    ax.set_xlim(0,10)
    ax.set_ylim(-40,40)
    ax.set_xlabel('$x_1$', fontsize=15)
    ax.set_ylabel(r'$\log\frac{p(X)}{1-p(X)}$', fontsize=15)
    ax.text(0, 40, '$where\ x_0=0$', fontsize=15, verticalalignment='top')
    
    ax = fig.add_subplot(1,3,(2,3), projection='3d')
    logy = logistic(Y)
    logy[X_mesh[...,0]<3] = np.nan
    logy[X_mesh[...,0]>9] = np.nan
    logy[X_mesh[...,1]<2] = np.nan
    logy[X_mesh[...,1]>5] = np.nan
    ax.plot_wireframe(X_mesh[...,0], X_mesh[...,1], 
                         logy, 
                         color="grey", alpha=0.5, lw=1, rcount=40, ccount=40)
    precision = dataY==np.round(logreg_predict(dataX, np.array([beta1, beta2]), beta0).squeeze())
    cim = ax.scatter(dataX[:,0], dataX[:,1], dataY, s=20, marker='o',\
                        c=precision, \
                        cmap=cm.get_cmap('seismic_r', 2))
    cim.set_facecolor('none')
    ax.set_xlim(3,9)
    ax.set_ylim(2,5)
    ax.set_zlim(0,1)
    cax = plt.colorbar(cim)
    cax.set_ticks([0.25, 0.75])
    cax.ax.set_yticklabels(['incorrect', 'correct'])
    ax.view_init(elev, azim)
    ax.set_title('Logisitc model\nError rate: {:.3}'.format(
        1-len(precision.nonzero()[0])/len(precision)), fontsize=15)
    ax.grid(False)
    ax.set_xlabel('$x_0$', fontsize=15)
    ax.set_ylabel('$x_1$', fontsize=15)
    ax.set_zlabel('p(x)', fontsize=15)
    plt.show()

    return X, Y

fit_logreg_2d = interactive(plot_logistic_2d, beta0=(-40., 40.), beta1=(-10.,10.), beta2=(-10.,10.),
                azim=(-180,180, 22.5), elev=(-180,180, 22.5))

In [121]:
display(fit_logreg_2d)

interactive(children=(FloatSlider(value=-1.0, description='beta0', max=40.0, min=-40.0), FloatSlider(value=3.0…

# Fitting the logistic model using "maximum likelihood"

Too slow, first test if binder is fast enough
oh could be a RISExJupyterNotebook thing, try Lab?

# Discriminant analysis